Compare commits
10 Commits
replace_ag
...
genaicomps
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50c5af5612 | ||
|
|
40be38f68b | ||
|
|
fb51d9f2ed | ||
|
|
be4e9ad000 | ||
|
|
fe90ca172f | ||
|
|
3146d5d69d | ||
|
|
744f7c9519 | ||
|
|
bac73f4e1a | ||
|
|
1a80dcf4d1 | ||
|
|
5d302d7501 |
4
.github/CODEOWNERS
vendored
4
.github/CODEOWNERS
vendored
@@ -1,4 +1,3 @@
|
||||
* liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
|
||||
/.github/ suyue.chen@intel.com ze.pan@intel.com
|
||||
/AgentQnA/ kaokao.lv@intel.com minmin.hou@intel.com
|
||||
/AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com
|
||||
@@ -20,4 +19,5 @@
|
||||
/Text2Image/ wenjiao.yue@intel.com xinyu.ye@intel.com
|
||||
/Translation/ liang1.lv@intel.com sihan.chen@intel.com
|
||||
/VideoQnA/ huiling.bao@intel.com xinyao.wang@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com sihan.chen@intel.com
|
||||
/*/ liang1.lv@intel.com feng.tian@intel.com suyue.chen@intel.com
|
||||
|
||||
18
.github/ISSUE_TEMPLATE/1_bug_template.yml
vendored
18
.github/ISSUE_TEMPLATE/1_bug_template.yml
vendored
@@ -66,7 +66,6 @@ body:
|
||||
options:
|
||||
- label: Pull docker images from hub.docker.com
|
||||
- label: Build docker images from source
|
||||
- label: Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
@@ -75,11 +74,10 @@ body:
|
||||
attributes:
|
||||
label: Deploy method
|
||||
options:
|
||||
- label: Docker compose
|
||||
- label: Docker
|
||||
- label: Docker Compose
|
||||
- label: Kubernetes Helm Charts
|
||||
- label: Kubernetes GMC
|
||||
- label: Other
|
||||
- label: Kubernetes
|
||||
- label: Helm
|
||||
validations:
|
||||
required: true
|
||||
|
||||
@@ -90,7 +88,6 @@ body:
|
||||
options:
|
||||
- Single Node
|
||||
- Multiple Nodes
|
||||
- Other
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
@@ -130,12 +127,3 @@ body:
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
|
||||
|
||||
- type: textarea
|
||||
id: attachments
|
||||
attributes:
|
||||
label: Attachments
|
||||
description: Attach any relevant files or screenshots.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
@@ -66,7 +66,6 @@ body:
|
||||
options:
|
||||
- Single Node
|
||||
- Multiple Nodes
|
||||
- Other
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
|
||||
1
.github/code_spell_ignore.txt
vendored
1
.github/code_spell_ignore.txt
vendored
@@ -1,3 +1,2 @@
|
||||
ModelIn
|
||||
modelin
|
||||
pressEnter
|
||||
2
.github/license_template.txt
vendored
2
.github/license_template.txt
vendored
@@ -1,2 +1,2 @@
|
||||
Copyright (C) 2025 Intel Corporation
|
||||
Copyright (C) 2024 Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
65
.github/workflows/_build_comps_base_image.yml
vendored
65
.github/workflows/_build_comps_base_image.yml
vendored
@@ -1,65 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build Comps Base Image
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
node:
|
||||
required: true
|
||||
type: string
|
||||
build:
|
||||
default: true
|
||||
required: false
|
||||
type: boolean
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
type: string
|
||||
opea_branch:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
pre-build-image-check:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
should_skip: ${{ steps.check-skip.outputs.should_skip }}
|
||||
steps:
|
||||
- name: Check if job should be skipped
|
||||
id: check-skip
|
||||
run: |
|
||||
should_skip=false
|
||||
if [[ "${{ inputs.node }}" == "gaudi3" || "${{ inputs.node }}" == "rocm" || "${{ inputs.node }}" == "arc" ]]; then
|
||||
should_skip=true
|
||||
fi
|
||||
echo "should_skip=$should_skip"
|
||||
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
|
||||
|
||||
build-images:
|
||||
needs: [ pre-build-image-check ]
|
||||
if: ${{ needs.pre-build-image-check.outputs.should_skip == 'false' && fromJSON(inputs.build) }}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git rev-parse HEAD && cd ../ && ls -l
|
||||
|
||||
- name: Build Image
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/GenAIComps
|
||||
docker_compose_path: ${{ github.workspace }}/GenAIComps/.github/workflows/docker/compose/base-compose.yaml
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
tag: ${{ inputs.tag }}
|
||||
103
.github/workflows/_build_image.yml
vendored
103
.github/workflows/_build_image.yml
vendored
@@ -1,103 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build Images
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
node:
|
||||
required: true
|
||||
type: string
|
||||
build:
|
||||
default: true
|
||||
required: false
|
||||
type: boolean
|
||||
example:
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
type: string
|
||||
opea_branch:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
pre-build-image-check:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
should_skip: ${{ steps.check-skip.outputs.should_skip }}
|
||||
steps:
|
||||
- name: Check if job should be skipped
|
||||
id: check-skip
|
||||
run: |
|
||||
should_skip=false
|
||||
if [[ "${{ inputs.node }}" == "gaudi3" || "${{ inputs.node }}" == "rocm" || "${{ inputs.node }}" == "arc" ]]; then
|
||||
should_skip=true
|
||||
fi
|
||||
echo "should_skip=$should_skip"
|
||||
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
|
||||
|
||||
build-images:
|
||||
needs: [ pre-build-image-check ]
|
||||
if: ${{ needs.pre-build-image-check.outputs.should_skip == 'false' && fromJSON(inputs.build) }}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get Checkout Ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Checkout out GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git && cd vllm
|
||||
# Get the latest tag
|
||||
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||
echo "Check out vLLM tag ${VLLM_VER}"
|
||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||
fi
|
||||
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||
# Get the latest tag
|
||||
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||
echo "Check out vLLM tag ${VLLM_VER}"
|
||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
||||
fi
|
||||
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git rev-parse HEAD && cd ../
|
||||
|
||||
- name: Build Image
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
tag: ${{ inputs.tag }}
|
||||
76
.github/workflows/_example-workflow.yml
vendored
76
.github/workflows/_example-workflow.yml
vendored
@@ -28,7 +28,7 @@ on:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
test_helmchart:
|
||||
test_k8s:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
@@ -43,53 +43,83 @@ on:
|
||||
inject_commit:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
use_model_cache:
|
||||
default: false
|
||||
required: false
|
||||
type: boolean
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
####################################################################################################
|
||||
# Image Build
|
||||
####################################################################################################
|
||||
build-images:
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
node: ${{ inputs.node }}
|
||||
build: ${{ fromJSON(inputs.build) }}
|
||||
example: ${{ inputs.example }}
|
||||
services: ${{ inputs.services }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get Checkout Ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Checkout out GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork && git checkout 3c39626 && cd ../
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||
|
||||
- name: Build Image
|
||||
if: ${{ fromJSON(inputs.build) }}
|
||||
uses: opea-project/validation/actions/image-build@main
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
####################################################################################################
|
||||
# Docker Compose Test
|
||||
####################################################################################################
|
||||
test-example-compose:
|
||||
needs: [build-images]
|
||||
if: ${{ inputs.test_compose }}
|
||||
if: ${{ fromJSON(inputs.test_compose) }}
|
||||
uses: ./.github/workflows/_run-docker-compose.yml
|
||||
with:
|
||||
tag: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
use_model_cache: ${{ inputs.use_model_cache }}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
####################################################################################################
|
||||
# helmchart Test
|
||||
# K8S Test
|
||||
####################################################################################################
|
||||
test-helmchart:
|
||||
if: ${{ fromJSON(inputs.test_helmchart) }}
|
||||
uses: ./.github/workflows/_helm-e2e.yml
|
||||
test-k8s-manifest:
|
||||
needs: [build-images]
|
||||
if: ${{ fromJSON(inputs.test_k8s) }}
|
||||
uses: ./.github/workflows/_manifest-e2e.yml
|
||||
with:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
mode: "CD"
|
||||
secrets: inherit
|
||||
|
||||
####################################################################################################
|
||||
@@ -97,7 +127,7 @@ jobs:
|
||||
####################################################################################################
|
||||
test-gmc-pipeline:
|
||||
needs: [build-images]
|
||||
if: false # ${{ fromJSON(inputs.test_gmc) }}
|
||||
if: ${{ fromJSON(inputs.test_gmc) }}
|
||||
uses: ./.github/workflows/_gmc-e2e.yml
|
||||
with:
|
||||
example: ${{ inputs.example }}
|
||||
|
||||
4
.github/workflows/_get-test-matrix.yml
vendored
4
.github/workflows/_get-test-matrix.yml
vendored
@@ -60,11 +60,9 @@ jobs:
|
||||
base_commit=$(git rev-parse HEAD~1) # push event
|
||||
fi
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
echo "print all changed files..."
|
||||
git diff --name-only ${base_commit} ${merged_commit}
|
||||
changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | \
|
||||
grep -vE '${{ inputs.diff_excluded_files }}')" || true
|
||||
echo "filtered changed_files=$changed_files"
|
||||
echo "changed_files=$changed_files"
|
||||
export changed_files=$changed_files
|
||||
export test_mode=${{ inputs.test_mode }}
|
||||
export WORKSPACE=${{ github.workspace }}
|
||||
|
||||
234
.github/workflows/_helm-e2e.yml
vendored
234
.github/workflows/_helm-e2e.yml
vendored
@@ -1,234 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Helm Chart E2e Test For Call
|
||||
permissions: read-all
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
example:
|
||||
default: "chatqna"
|
||||
required: true
|
||||
type: string
|
||||
description: "example to test, chatqna or common/asr"
|
||||
hardware:
|
||||
default: "xeon"
|
||||
required: true
|
||||
type: string
|
||||
dockerhub:
|
||||
default: "false"
|
||||
required: false
|
||||
type: string
|
||||
description: "Set to true if you want to use released docker images at dockerhub. By default using internal docker registry."
|
||||
mode:
|
||||
default: "CD"
|
||||
description: "Whether the test range is CI, CD or CICD"
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
type: string
|
||||
version:
|
||||
default: "0-latest"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-test-case:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
value_files: ${{ steps.get-test-files.outputs.value_files }}
|
||||
CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
steps:
|
||||
- name: Get checkout ref
|
||||
id: get-checkout-ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
|
||||
else
|
||||
CHECKOUT_REF=${{ github.ref }}
|
||||
fi
|
||||
echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
|
||||
echo "checkout ref ${CHECKOUT_REF}"
|
||||
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get test Services
|
||||
id: get-test-files
|
||||
run: |
|
||||
set -x
|
||||
if [ "${{ inputs.mode }}" = "CI" ]; then
|
||||
base_commit=${{ github.event.pull_request.base.sha }}
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \
|
||||
grep "${{ inputs.example }}/kubernetes/helm" | \
|
||||
grep "values.yaml" |\
|
||||
sort -u)
|
||||
echo $values_files
|
||||
elif [ "${{ inputs.mode }}" = "CD" ]; then
|
||||
values_files=$(ls ${{ inputs.example }}/kubernetes/helm/*values.yaml || true)
|
||||
fi
|
||||
value_files="["
|
||||
for file in ${values_files}; do
|
||||
if [ -f "$file" ]; then
|
||||
filename=$(basename "$file")
|
||||
if [[ "$filename" == *"gaudi"* ]]; then
|
||||
if [[ "${{ inputs.hardware }}" == "gaudi" ]]; then
|
||||
value_files="${value_files}\"${filename}\","
|
||||
fi
|
||||
elif [[ "$filename" == *"nv"* ]]; then
|
||||
continue
|
||||
else
|
||||
if [[ "${{ inputs.hardware }}" == "xeon" ]]; then
|
||||
value_files="${value_files}\"${filename}\","
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
value_files="${value_files%,}]"
|
||||
|
||||
echo "value_files=${value_files}"
|
||||
echo "value_files=${value_files}" >> $GITHUB_OUTPUT
|
||||
|
||||
helm-test:
|
||||
needs: [get-test-case]
|
||||
if: ${{ needs.get-test-case.outputs.value_files != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
value_file: ${{ fromJSON(needs.get-test-case.outputs.value_files) }}
|
||||
fail-fast: false
|
||||
runs-on: k8s-${{ inputs.hardware }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: |
|
||||
echo "value_file=${{ matrix.value_file }}"
|
||||
sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
id: get-checkout-ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
|
||||
else
|
||||
CHECKOUT_REF=${{ github.ref }}
|
||||
fi
|
||||
echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
|
||||
echo "checkout ref ${CHECKOUT_REF}"
|
||||
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
env:
|
||||
example: ${{ inputs.example }}
|
||||
run: |
|
||||
CHART_NAME="${example,,}" # CodeGen
|
||||
echo "CHART_NAME=$CHART_NAME" >> $GITHUB_ENV
|
||||
echo "RELEASE_NAME=${CHART_NAME}$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=${CHART_NAME}-$(head -c 4 /dev/urandom | xxd -p)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
|
||||
echo "TEST_TIMEOUT_SECONDS=600s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
echo "CHART_FOLDER=${example}/kubernetes/helm" >> $GITHUB_ENV
|
||||
|
||||
- name: Helm install
|
||||
id: install
|
||||
env:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HFTOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
value_file: ${{ matrix.value_file }}
|
||||
run: |
|
||||
set -xe
|
||||
echo "should_cleanup=true" >> $GITHUB_ENV
|
||||
if [[ ! -f ${{ github.workspace }}/${{ env.CHART_FOLDER }}/${value_file} ]]; then
|
||||
echo "No value file found, exiting test!"
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
exit 0
|
||||
fi
|
||||
|
||||
for img in `helm template -n $NAMESPACE $RELEASE_NAME oci://ghcr.io/opea-project/charts/${CHART_NAME} -f ${{ inputs.example }}/kubernetes/helm/${value_file} --version ${{ inputs.version }} | grep 'image:' | grep 'opea/' | awk '{print $2}' | xargs`;
|
||||
do
|
||||
# increase helm install wait for for vllm-gaudi case
|
||||
if [[ $img == *"vllm-gaudi"* ]]; then
|
||||
ROLLOUT_TIMEOUT_SECONDS=900s
|
||||
fi
|
||||
done
|
||||
if ! helm install \
|
||||
--create-namespace \
|
||||
--namespace $NAMESPACE \
|
||||
$RELEASE_NAME \
|
||||
oci://ghcr.io/opea-project/charts/${CHART_NAME} \
|
||||
--set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} \
|
||||
--set global.modelUseHostPath=/data2/hf_model \
|
||||
--set GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
|
||||
--set GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \
|
||||
--set web-retriever.GOOGLE_API_KEY=${{ env.GOOGLE_API_KEY}} \
|
||||
--set web-retriever.GOOGLE_CSE_ID=${{ env.GOOGLE_CSE_ID}} \
|
||||
-f ${{ inputs.example }}/kubernetes/helm/${value_file} \
|
||||
--version ${{ inputs.version }} \
|
||||
--wait --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Failed to install chart ${{ inputs.example }}"
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
.github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Validate e2e test
|
||||
if: always()
|
||||
run: |
|
||||
set -xe
|
||||
if $skip_validate; then
|
||||
echo "Skip validate"
|
||||
else
|
||||
LOG_PATH=/home/$(whoami)/helm-logs
|
||||
chart=${{ env.CHART_NAME }}
|
||||
helm test -n $NAMESPACE $RELEASE_NAME --logs --timeout "$TEST_TIMEOUT_SECONDS" | tee ${LOG_PATH}/charts-${chart}.log
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "Chart ${chart} test failed, please check the logs in ${LOG_PATH}!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
teststatus=false
|
||||
if [[ -f $LOG_PATH/charts-${chart}.log ]] && \
|
||||
[[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then
|
||||
teststatus=false
|
||||
${{ github.workspace }}/.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
else
|
||||
teststatus=true
|
||||
fi
|
||||
|
||||
if [ $teststatus == false ]; then
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeeded!"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Helm uninstall
|
||||
if: always()
|
||||
run: |
|
||||
if $should_cleanup; then
|
||||
helm uninstall $RELEASE_NAME --namespace $NAMESPACE
|
||||
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
|
||||
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
185
.github/workflows/_manifest-e2e.yml
vendored
Normal file
185
.github/workflows/_manifest-e2e.yml
vendored
Normal file
@@ -0,0 +1,185 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Single Kubernetes Manifest E2e Test For Call
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: "The example to test on K8s"
|
||||
required: true
|
||||
type: string
|
||||
hardware:
|
||||
default: "xeon"
|
||||
description: "Nodes to run the test, xeon or gaudi"
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images, default is latest"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-test-case:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
test_cases: ${{ steps.test-case-matrix.outputs.test_cases }}
|
||||
CHECKOUT_REF: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
steps:
|
||||
- name: Get checkout ref
|
||||
id: get-checkout-ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge
|
||||
else
|
||||
CHECKOUT_REF=${{ github.ref }}
|
||||
fi
|
||||
echo "CHECKOUT_REF=${CHECKOUT_REF}" >> $GITHUB_OUTPUT
|
||||
echo "checkout ref ${CHECKOUT_REF}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get test matrix
|
||||
shell: bash
|
||||
id: test-case-matrix
|
||||
run: |
|
||||
example_l=$(echo ${{ inputs.example }} | tr '[:upper:]' '[:lower:]')
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/tests
|
||||
run_test_cases=""
|
||||
|
||||
default_test_case=$(find . -type f -name "test_manifest_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
|
||||
if [ "$default_test_case" ]; then run_test_cases="$default_test_case"; fi
|
||||
other_test_cases=$(find . -type f -name "test_manifest_*_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
|
||||
echo "default_test_case=$default_test_case"
|
||||
echo "other_test_cases=$other_test_cases"
|
||||
|
||||
if [ "${{ inputs.tag }}" == "ci" ]; then
|
||||
base_commit=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||
"https://api.github.com/repos/opea-project/GenAIExamples/commits?sha=${{ github.event.pull_request.base.ref }}" | jq -r '.[0].sha')
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-only ${base_commit} ${merged_commit} | grep -vE '${{ inputs.diff_excluded_files }}')" || true
|
||||
fi
|
||||
|
||||
for test_case in $other_test_cases; do
|
||||
if [ "${{ inputs.tag }}" == "ci" ]; then
|
||||
flag=${test_case%_on_*}
|
||||
flag=${flag#test_compose_}
|
||||
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${{ inputs.example }} | grep ${flag}) ]]; then
|
||||
run_test_cases="$run_test_cases $test_case"
|
||||
fi
|
||||
else
|
||||
run_test_cases="$run_test_cases $test_case"
|
||||
fi
|
||||
done
|
||||
|
||||
test_cases=$(echo $run_test_cases | tr ' ' '\n' | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "test_cases=$test_cases"
|
||||
echo "test_cases=$test_cases" >> $GITHUB_OUTPUT
|
||||
|
||||
manifest-test:
|
||||
needs: [get-test-case]
|
||||
strategy:
|
||||
matrix:
|
||||
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
|
||||
fail-fast: false
|
||||
runs-on: "k8s-${{ inputs.hardware }}"
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
env:
|
||||
test_case: ${{ matrix.test_case }}
|
||||
run: |
|
||||
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
name=$(echo "$test_case" | cut -d/ -f2 | cut -d'_' -f3- |cut -d'_' -f1 | grep -v 'on' | sed 's/^/-/')
|
||||
echo "NAMESPACE=$lower_example$name-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
|
||||
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
id: install
|
||||
env:
|
||||
test_case: ${{ matrix.test_case }}
|
||||
run: |
|
||||
set -x
|
||||
echo "test_case=$test_case"
|
||||
if [[ ! -f ${{ github.workspace }}/${{ inputs.example }}/tests/${test_case} ]]; then
|
||||
echo "No test script found, exist test!"
|
||||
exit 0
|
||||
else
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/${test_case} init_${{ inputs.example }}
|
||||
echo "should_cleanup=true" >> $GITHUB_ENV
|
||||
kubectl create ns $NAMESPACE
|
||||
${{ github.workspace }}/${{ inputs.example }}/tests/${test_case} install_${{ inputs.example }} $NAMESPACE
|
||||
echo "Testing ${{ inputs.example }}, waiting for pod ready..."
|
||||
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Testing manifests ${{ inputs.example }}, waiting for pod ready done!"
|
||||
echo "skip_validate=false" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
|
||||
.github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE
|
||||
exit 1
|
||||
fi
|
||||
sleep 60
|
||||
fi
|
||||
|
||||
- name: Validate e2e test
|
||||
if: always()
|
||||
env:
|
||||
test_case: ${{ matrix.test_case }}
|
||||
run: |
|
||||
if $skip_validate; then
|
||||
echo "Skip validate"
|
||||
else
|
||||
if ${{ github.workspace }}/${{ inputs.example }}/tests/${test_case} validate_${{ inputs.example }} $NAMESPACE ; then
|
||||
echo "Validate ${test_case} successful!"
|
||||
else
|
||||
echo "Validate ${test_case} failure!!!"
|
||||
echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Dump logs when e2e test failed
|
||||
if: failure()
|
||||
run: |
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
|
||||
- name: Kubectl uninstall
|
||||
if: always()
|
||||
run: |
|
||||
if $should_cleanup; then
|
||||
if ! kubectl delete ns $NAMESPACE --timeout=$KUBECTL_TIMEOUT_SECONDS; then
|
||||
kubectl delete pods --namespace $NAMESPACE --force --grace-period=0 --all
|
||||
kubectl delete ns $NAMESPACE --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS
|
||||
fi
|
||||
fi
|
||||
44
.github/workflows/_run-docker-compose.yml
vendored
44
.github/workflows/_run-docker-compose.yml
vendored
@@ -28,10 +28,6 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
use_model_cache:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
jobs:
|
||||
get-test-case:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -64,14 +60,9 @@ jobs:
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/tests
|
||||
run_test_cases=""
|
||||
|
||||
if [ "${{ inputs.hardware }}" == "gaudi2" ] || [ "${{ inputs.hardware }}" == "gaudi3" ]; then
|
||||
hardware="gaudi"
|
||||
else
|
||||
hardware="${{ inputs.hardware }}"
|
||||
fi
|
||||
default_test_case=$(find . -type f -name "test_compose_on_$hardware.sh" | cut -d/ -f2)
|
||||
default_test_case=$(find . -type f -name "test_compose_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
|
||||
if [ "$default_test_case" ]; then run_test_cases="$default_test_case"; fi
|
||||
other_test_cases=$(find . -type f -name "test_compose_*_on_$hardware.sh" | cut -d/ -f2)
|
||||
other_test_cases=$(find . -type f -name "test_compose_*_on_${{ inputs.hardware }}.sh" | cut -d/ -f2)
|
||||
echo "default_test_case=$default_test_case"
|
||||
echo "other_test_cases=$other_test_cases"
|
||||
|
||||
@@ -94,17 +85,12 @@ jobs:
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$run_test_cases" ] && [[ $(printf '%s\n' "${changed_files[@]}" | grep ${{ inputs.example }} | grep /tests/) ]]; then
|
||||
run_test_cases=$other_test_cases
|
||||
fi
|
||||
|
||||
test_cases=$(echo $run_test_cases | tr ' ' '\n' | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "test_cases=$test_cases"
|
||||
echo "test_cases=$test_cases" >> $GITHUB_OUTPUT
|
||||
|
||||
compose-test:
|
||||
run-test:
|
||||
needs: [get-test-case]
|
||||
if: ${{ needs.get-test-case.outputs.test_cases != '[""]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
test_case: ${{ fromJSON(needs.get-test-case.outputs.test_cases) }}
|
||||
@@ -115,11 +101,6 @@ jobs:
|
||||
- name: Clean up Working Directory
|
||||
run: |
|
||||
sudo rm -rf ${{github.workspace}}/* || true
|
||||
|
||||
# clean up containers use ports
|
||||
cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*/*/*:latest" -q) || true
|
||||
docker rmi $(docker images --filter reference="*/*:ci" -q) || true
|
||||
@@ -145,32 +126,22 @@ jobs:
|
||||
shell: bash
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
|
||||
SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
|
||||
SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
opea_branch: "refactor_comps"
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.hardware }}
|
||||
test_case: ${{ matrix.test_case }}
|
||||
use_model_cache: ${{ inputs.use_model_cache }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/$example/tests
|
||||
if [[ "$IMAGE_REPO" == "" ]]; then export IMAGE_REPO="${OPEA_IMAGE_REPO}opea"; fi
|
||||
if [[ "$use_model_cache" == "true" ]]; then
|
||||
if [ -d "/data2/hf_model" ]; then
|
||||
export model_cache="/data2/hf_model"
|
||||
else
|
||||
echo "Model cache directory /data2/hf_model does not exist"
|
||||
export model_cache="~/.cache/huggingface/hub"
|
||||
fi
|
||||
fi
|
||||
if [ -f "${test_case}" ]; then timeout 60m bash "${test_case}"; else echo "Test script {${test_case}} not found, skip test!"; fi
|
||||
if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
|
||||
|
||||
- name: Clean up container after test
|
||||
shell: bash
|
||||
@@ -180,11 +151,6 @@ jobs:
|
||||
export test_case=${{ matrix.test_case }}
|
||||
export hardware=${{ inputs.hardware }}
|
||||
bash ${{ github.workspace }}/.github/workflows/scripts/docker_compose_clean_up.sh "containers"
|
||||
|
||||
# clean up containers use ports
|
||||
cid=$(docker ps --format '{{.Names}} : {{.Ports}}' | grep -v ' : $' | grep -v 5000 | awk -F' : ' '{print $1}')
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
|
||||
|
||||
|
||||
2
.github/workflows/check-online-doc-build.yml
vendored
2
.github/workflows/check-online-doc-build.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
|
||||
28
.github/workflows/daily_check_issue_and_pr.yml
vendored
28
.github/workflows/daily_check_issue_and_pr.yml
vendored
@@ -1,28 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check stale issue and pr
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "30 22 * * *"
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
with:
|
||||
days-before-issue-stale: 30
|
||||
days-before-pr-stale: 30
|
||||
days-before-issue-close: 7
|
||||
days-before-pr-close: 7
|
||||
stale-issue-message: "This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
|
||||
stale-pr-message: "This PR is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
|
||||
close-issue-message: "This issue was closed because it has been stalled for 7 days with no activity."
|
||||
close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity."
|
||||
repo-token: ${{ secrets.ACTION_TOKEN }}
|
||||
start-date: "2025-03-01T00:00:00Z"
|
||||
984
.github/workflows/dockerhub-description.yml
vendored
984
.github/workflows/dockerhub-description.yml
vendored
@@ -1,984 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Update Docker Hub Description
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
dockerHubDescription:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout current repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Description for audioqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/audioqna
|
||||
short-description: "The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting audio input to text, and then using text-to-speech (TTS) to convert those answers back to speech for interaction."
|
||||
readme-filepath: AudioQnA/README.md
|
||||
enable-url-completion: true
|
||||
|
||||
- name: Description for audioqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/audioqna-ui
|
||||
short-description: "The docker image acted as the audioqna UI entry for enabling seamless interaction with users"
|
||||
readme-filepath: AudioQnA/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for audioqna-multilang
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/audioqna-multilang
|
||||
short-description: "The docker image served as an audioqna gateway and using language modeling to generate answers to user queries by converting multilingual audio input to text, and then use multilingual text-to-speech (TTS) to convert those answers back to speech for interaction."
|
||||
readme-filepath: AudioQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for avatarchatbot
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/avatarchatbot
|
||||
short-description: "The docker image served as a avatarchatbot gateway and interacted with users by understanding their questions and providing relevant answers."
|
||||
readme-filepath: AvatarChatbot/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for chatqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/chatqna
|
||||
short-description: "The docker image served as a chatqna gateway and interacted with users by understanding their questions and providing relevant answers."
|
||||
readme-filepath: ChatQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for chatqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/chatqna-ui
|
||||
short-description: "The docker image acted as the chatqna UI entry for facilitating interaction with users for question answering"
|
||||
readme-filepath: ChatQnA/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for chatqna-conversation-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/chatqna-conversation-ui
|
||||
short-description: "The purpose of the docker image is to provide a user interface for chat-based Q&A using React. It allows for interaction with users and supports continuing conversations with a history that is stored in the browser's local storage."
|
||||
readme-filepath: ChatQnA/ui/react/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for codegen
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/codegen
|
||||
short-description: "The docker image served as the codegen gateway to provide service of the automatic creation of source code from a higher-level representation"
|
||||
readme-filepath: CodeGen/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for codegen-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/codegen-ui
|
||||
short-description: "The docker image acted as the codegen UI entry for facilitating interaction with users for automatically generating code from user's description"
|
||||
readme-filepath: CodeGen/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for codegen-react-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/codegen-react-ui
|
||||
short-description: "The purpose of the docker image is to provide a user interface for Codegen using React. It allows generating the appropriate code based on the current user input."
|
||||
readme-filepath: CodeGen/ui/react/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for codetrans
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/codetrans
|
||||
short-description: "The docker image served as a codetrans gateway to provide service of converting source code written in one programming language into an equivalent version in another programming language"
|
||||
readme-filepath: CodeTrans/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for codetrans-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/codetrans-ui
|
||||
short-description: "The docker image acted as the codetrans UI entry for facilitating interaction with users for translating one programming language to another one"
|
||||
readme-filepath: CodeTrans/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for doc-index-retriever
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/doc-index-retriever
|
||||
short-description: "The docker image acts as a DocRetriever gateway, It uses different methods to match user queries with a set of free text records."
|
||||
readme-filepath: DocIndexRetriever/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for docsum
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/docsum
|
||||
short-description: "The docker image served as a docsum gateway to provide service of capturing the main points and essential details of the original text"
|
||||
readme-filepath: Docsum/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for docsum-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/docsum-ui
|
||||
short-description: "The docker image acted as the docsum UI entry for facilitating interaction with users for document summarization"
|
||||
readme-filepath: Docsum/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for docsum-react-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/docsum-react-ui
|
||||
short-description: "The purpose of the docker image is to provide a user interface for document summary using React. It allows upload a file or paste text and then click on “Generate Summary” to get a condensed summary of the generated content and automatically scroll to the bottom of the summary."
|
||||
readme-filepath: Docsum/ui/react/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for docsum-gradio-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/docsum-gradio-ui
|
||||
short-description: "The purpose of the docker image is to provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries."
|
||||
readme-filepath: Docsum/ui/gradio/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for edgecraftrag
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/edgecraftrag
|
||||
short-description: "The docker image served as an Edge Craft RAG (EC-RAG) gateway, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions."
|
||||
readme-filepath: EdgeCraftRag/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for edgecraftrag-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/edgecraftrag-ui
|
||||
short-description: "The docker image acted as the Edge Craft RAG (EC-RAG) UI entry. It ensuring high-quality, performant interactions tailored for edge environments."
|
||||
readme-filepath: EdgeCraftRag/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for edgecraftrag-server
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/edgecraftrag-server
|
||||
short-description: "The docker image served as an Edge Craft RAG (EC-RAG) server, delivering a customizable and production-ready Retrieval-Augmented Generation system optimized for edge solutions."
|
||||
readme-filepath: EdgeCraftRag/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for graphrag
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/graphrag
|
||||
short-description: "The docker image served as a GraphRAG gateway, leveraging a knowledge graph derived from source documents to address both local and global queries."
|
||||
readme-filepath: GraphRag/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for graphrag-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/graphrag-ui
|
||||
short-description: "The docker image acted as the GraphRAG UI entry for facilitating interaction with users"
|
||||
readme-filepath: GraphRag/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for graphrag-react-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/graphrag-react-ui
|
||||
short-description: "The purpose of the docker image is to provide a user interface for GraphRAG using React."
|
||||
readme-filepath: GraphRag/ui/react/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for multimodalqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/multimodalqna
|
||||
short-description: "The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video collection to solve the problem."
|
||||
readme-filepath: MultimodalQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for multimodalqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/multimodalqna-ui
|
||||
short-description: "The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from videos uploaded by users."
|
||||
readme-filepath: MultimodalQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for productivity-suite-react-ui-server
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/productivity-suite-react-ui-server
|
||||
short-description: "The purpose of the docker image is to provide a user interface for Productivity Suite Application using React. It allows interaction by uploading documents and inputs."
|
||||
readme-filepath: ProductivitySuite/ui/react/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for searchqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/searchqna
|
||||
short-description: "The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset"
|
||||
readme-filepath: SearchQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for searchqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/searchqna-ui
|
||||
short-description: "The docker image acted as the searchqna UI entry for facilitating interaction with users for question answering"
|
||||
readme-filepath: SearchQnA/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for translation
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/translation
|
||||
short-description: "The docker image served as the translation gateway to provide service of language translation"
|
||||
readme-filepath: Translation/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for translation-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/translation-ui
|
||||
short-description: "The docker image acted as the translation UI entry for facilitating interaction with users for language translation"
|
||||
readme-filepath: Translation/ui/svelte/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for videoqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/videoqna
|
||||
short-description: "The docker image acts as videoqna gateway, interacting with the user by retrieving videos based on user prompts"
|
||||
readme-filepath: VideoQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for videoqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/videoqna-ui
|
||||
short-description: "The docker image serves as the user interface entry point for the videoqna, facilitating interaction with the user and retrieving the video based on user prompts."
|
||||
readme-filepath: VideoQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for visualqna
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/visualqna
|
||||
short-description: "The docker image acts as a videoqna gateway, outputting answers in natural language based on a combination of images and questions"
|
||||
readme-filepath: VisualQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for visualqna-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/visualqna-ui
|
||||
short-description: "The docker image serves as the user interface portal for VisualQnA, facilitating interaction with the user and outputting answers in natural language based on a combination of images and questions from the user."
|
||||
readme-filepath: VisualQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/agent-ui
|
||||
short-description: "The docker image exposed the OPEA agent microservice UI entry for GenAI application use."
|
||||
readme-filepath: AgentQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for text2image-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/text2image-ui
|
||||
short-description: "The docker image exposed the OPEA text-to-image microservice UI entry for GenAI application use."
|
||||
readme-filepath: Text2Image/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for text2sql-react-ui
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/text2sql-react-ui
|
||||
short-description: "The docker image exposed the OPEA text to Structured Query Language microservice react UI entry for GenAI application use."
|
||||
readme-filepath: DBQnA/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Checkout GenAIComps
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: opea-project/GenAIComps
|
||||
path: GenAIComps
|
||||
|
||||
- name: Description for agent
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/agent
|
||||
short-description: "The docker image exposed the OPEA agent microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/agent/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/asr
|
||||
short-description: "The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/asr/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/animation
|
||||
short-description: "The purpose of the Docker image is to expose the OPEA Avatar Animation microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/animation/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/chathistory-mongo
|
||||
short-description: "The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations"
|
||||
readme-filepath: GenAIComps/comps/chathistory/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/dataprep
|
||||
short-description: "The docker image exposed the OPEA dataprep microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/dataprep/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/embedding
|
||||
short-description: "The docker image exposed the OPEA mosec embedding microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/embeddings/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/embedding-multimodal-clip
|
||||
short-description: "The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/third_parties/clip/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/embedding-multimodal-bridgetower
|
||||
short-description: "The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications."
|
||||
readme-filepath: GenAIComps/comps/third_parties/bridgetower/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/embedding-multimodal-bridgetower-gaudi
|
||||
short-description: "The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi."
|
||||
readme-filepath: GenAIComps/comps/third_parties/bridgetower/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/feedbackmanagement-mongo
|
||||
short-description: "The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications."
|
||||
readme-filepath: GenAIComps/comps/feedback_management/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/finetuning
|
||||
short-description: "The docker image exposed the OPEA Fine-tuning microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/finetuning/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/finetuning-gaudi
|
||||
short-description: "The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi."
|
||||
readme-filepath: GenAIComps/comps/finetuning/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/gpt-sovits
|
||||
short-description: "The docker image exposed the OPEA GPT-SoVITS service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/guardrails/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-toxicity-predictionguard
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/toxicity_detection/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-pii-predictionguard
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/pii_detection/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-injection-predictionguard
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide injection predictionguard for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/prompt_injection/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-hallucination-detection
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide hallucination detection for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/hallucination_detection/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-factuality-predictionguard
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide factuality predictionguard for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/factuality_alignment/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/guardrails-bias-detection
|
||||
short-description: "The docker image exposed the OPEA guardrail microservice to provide bias detection for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/guardrails/src/bias_detection/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/image2image-gaudi
|
||||
short-description: "The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use on the Gaudi."
|
||||
readme-filepath: GenAIComps/comps/image2image/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/image2image
|
||||
short-description: "The purpose of the Docker image is to expose the OPEA Image-to-Image microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/image2image/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/image2video-gaudi
|
||||
short-description: "The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use on the Gaudi."
|
||||
readme-filepath: GenAIComps/comps/image2image/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/image2video
|
||||
short-description: "The purpose of the Docker image is to expose the OPEA image-to-video microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/image2video/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llm-textgen
|
||||
short-description: "The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/llms/src/text-generation/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llm-textgen-gaudi
|
||||
short-description: "The docker image exposed the OPEA LLM microservice upon textgen docker image for GenAI application use on the Gaudi2."
|
||||
readme-filepath: GenAIComps/comps/llms/src/text-generation/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llm-eval
|
||||
short-description: "The docker image exposed the OPEA LLM microservice upon eval docker image for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/llms/utils/lm-eval/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llm-docsum
|
||||
short-description: "The docker image exposed the OPEA LLM microservice upon docsum docker image for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/llms/src/doc-summarization/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llm-faqgen
|
||||
short-description: "This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ."
|
||||
readme-filepath: GenAIComps/comps/llms/src/faq-generation/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm
|
||||
short-description: "The docker image exposed the OPEA large visual model (LVM) microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-llava
|
||||
short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-video-llama
|
||||
short-description: "The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/video-llama/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-predictionguard
|
||||
short-description: "The docker image exposed the OPEA microservice running predictionguard as a large visual model (LVM) server for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/predictionguard/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/llava-gaudi
|
||||
short-description: "The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi2."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llava/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-llama-vision
|
||||
short-description: "The docker image exposed the OPEA microservice running Llama Vision as the base large visual model service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-llama-vision-tp
|
||||
short-description: "The docker image exposed the OPEA microservice running Llama Vision with deepspeed as the base large visual model service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for lvm-llama-vision-guard
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/lvm-llama-vision-guard
|
||||
short-description: "The docker image exposed the OPEA microservice running Llama Vision Guard as the base large visual model service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/lvms/src/integrations/dependency/llama-vision/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for promptregistry-mongo
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/promptregistry-mongo
|
||||
short-description: "The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts."
|
||||
readme-filepath: GenAIComps/comps/prompt_registry/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for reranking
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/reranking
|
||||
short-description: "The docker image exposed the OPEA reranking microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/rerankings/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for retriever
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/retriever
|
||||
short-description: "The docker image exposed the OPEA retrieval microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/retrievers/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for text2image
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/text2image
|
||||
short-description: "The docker image exposed the OPEA text-to-image microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/text2image/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for text2image-gaudi
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/text2image-gaudi
|
||||
short-description: "The docker image exposed the OPEA text-to-image microservice for GenAI application use on the Gaudi."
|
||||
readme-filepath: GenAIComps/comps/text2image/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for text2sql
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/text2sql
|
||||
short-description: "The docker image exposed the OPEA text to Structured Query Language microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/text2sql/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for tts
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/tts
|
||||
short-description: "The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/tts/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for speecht5
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/speecht5
|
||||
short-description: "The docker image exposed the OPEA SpeechT5 service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/tts/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for speecht5-gaudi
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/speecht5-gaudi
|
||||
short-description: "The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/tts/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for gpt-sovits
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/gpt-sovits
|
||||
short-description: "The docker image exposed the OPEA gpt-sovits service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/tts/src/integrations/dependency/gpt-sovits/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for nginx
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/nginx
|
||||
short-description: "The docker image exposed the OPEA nginx microservice for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/third_parties/nginx/deployment/kubernetes/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for vectorstore-pathway
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/vectorstore-pathway
|
||||
short-description: "The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/third_parties/pathway/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for wav2lip
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/wav2lip
|
||||
short-description: "The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/third_parties/wav2lip/deployment/kubernetes/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for wav2lip-gaudi
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/wav2lip-gaudi
|
||||
short-description: "The docker image exposed the OPEA Generate lip movements from audio files microservice with Pathway for GenAI application use on the Gaudi2."
|
||||
readme-filepath: GenAIComps/comps/third_parties/wav2lip/deployment/kubernetes/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for vllm-arc
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/vllm-arc
|
||||
short-description: "The docker image powered by vllm-project for deploying and serving vllm Models on Arc."
|
||||
readme-filepath: GenAIComps/comps/third_parties/vllm/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for whisper-gaudi
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/whisper-gaudi
|
||||
short-description: "The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/asr/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for whisper
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/whisper
|
||||
short-description: "The docker image exposed the OPEA Whisper service for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/asr/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for web-retriever
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/web-retriever
|
||||
short-description: "The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use."
|
||||
readme-filepath: GenAIComps/comps/web_retrievers/src/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Checkout vllm-openvino
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
path: vllm
|
||||
|
||||
- name: Description for vllm-openvino
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/vllm-openvino
|
||||
short-description: "The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework."
|
||||
readme-filepath: vllm/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Checkout vllm-gaudi
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: HabanaAI/vllm-fork
|
||||
ref: habana_main
|
||||
path: vllm-fork
|
||||
|
||||
- name: Description for vllm-gaudi
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/vllm-gaudi
|
||||
short-description: "The docker image powered by vllm-project for deploying and serving vllm Models on Gaudi2."
|
||||
readme-filepath: vllm-fork/README.md
|
||||
enable-url-completion: false
|
||||
|
||||
- name: Description for vllm
|
||||
uses: peter-evans/dockerhub-description@v4
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: opea/vllm
|
||||
short-description: "The docker image powered by vllm-project for deploying and serving vllm Models."
|
||||
readme-filepath: vllm-fork/README.md
|
||||
enable-url-completion: false
|
||||
2
.github/workflows/manual-docker-publish.yml
vendored
2
.github/workflows/manual-docker-publish.yml
vendored
@@ -41,11 +41,9 @@ jobs:
|
||||
|
||||
publish:
|
||||
needs: [get-image-list]
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
|
||||
3
.github/workflows/manual-docker-scan.yml
vendored
3
.github/workflows/manual-docker-scan.yml
vendored
@@ -12,7 +12,7 @@ on:
|
||||
type: string
|
||||
examples:
|
||||
default: ""
|
||||
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
@@ -47,7 +47,6 @@ jobs:
|
||||
scan-docker:
|
||||
needs: get-image-list
|
||||
runs-on: "docker-build-${{ inputs.node }}"
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJson(needs.get-image-list.outputs.matrix) }}
|
||||
|
||||
42
.github/workflows/manual-example-workflow.yml
vendored
42
.github/workflows/manual-example-workflow.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi,xeon"
|
||||
description: "Hardware to run test gaudi,gaudi3,xeon,rocm,arc"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
@@ -20,6 +20,11 @@ on:
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
deploy_gmc:
|
||||
default: false
|
||||
description: 'Whether to deploy gmc'
|
||||
required: true
|
||||
type: boolean
|
||||
build:
|
||||
default: true
|
||||
description: 'Build test required images for Examples'
|
||||
@@ -30,9 +35,14 @@ on:
|
||||
description: 'Test examples with docker compose'
|
||||
required: false
|
||||
type: boolean
|
||||
test_helmchart:
|
||||
default: true
|
||||
description: 'Test examples with helm charts'
|
||||
test_k8s:
|
||||
default: false
|
||||
description: 'Test examples with k8s'
|
||||
required: false
|
||||
type: boolean
|
||||
test_gmc:
|
||||
default: false
|
||||
description: 'Test examples with gmc'
|
||||
required: false
|
||||
type: boolean
|
||||
opea_branch:
|
||||
@@ -42,14 +52,9 @@ on:
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
description: "inject commit to docker images"
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: boolean
|
||||
use_model_cache:
|
||||
default: false
|
||||
description: "use model cache"
|
||||
required: false
|
||||
type: boolean
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
@@ -69,20 +74,23 @@ jobs:
|
||||
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
|
||||
|
||||
build-comps-base:
|
||||
build-deploy-gmc:
|
||||
needs: [get-test-matrix]
|
||||
if: ${{ fromJSON(inputs.deploy_gmc) }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
uses: ./.github/workflows/_build_comps_base_image.yml
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_gmc-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
build: ${{ fromJSON(inputs.build) }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
secrets: inherit
|
||||
|
||||
run-examples:
|
||||
needs: [get-test-matrix, build-comps-base]
|
||||
needs: [get-test-matrix, build-deploy-gmc]
|
||||
if: always()
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }}
|
||||
@@ -95,8 +103,8 @@ jobs:
|
||||
tag: ${{ inputs.tag }}
|
||||
build: ${{ fromJSON(inputs.build) }}
|
||||
test_compose: ${{ fromJSON(inputs.test_compose) }}
|
||||
test_helmchart: ${{ fromJSON(inputs.test_helmchart) }}
|
||||
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
|
||||
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
use_model_cache: ${{ inputs.use_model_cache }}
|
||||
secrets: inherit
|
||||
|
||||
6
.github/workflows/manual-freeze-tag.yml
vendored
6
.github/workflows/manual-freeze-tag.yml
vendored
@@ -25,9 +25,9 @@ jobs:
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "CICD-at-OPEA"
|
||||
git config --global user.email "CICD@opea.dev"
|
||||
git remote set-url origin https://CICD-at-OPEA:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
git config --global user.name "NeuralChatBot"
|
||||
git config --global user.email "grp_neural_chat_bot@intel.com"
|
||||
git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIExamples.git
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
|
||||
5
.github/workflows/manual-image-build.yml
vendored
5
.github/workflows/manual-image-build.yml
vendored
@@ -32,9 +32,9 @@ on:
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
description: "inject commit to docker images"
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: boolean
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
@@ -51,7 +51,6 @@ jobs:
|
||||
|
||||
image-build:
|
||||
needs: get-test-matrix
|
||||
if: ${{ needs.get-test-matrix.outputs.nodes != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
|
||||
@@ -33,7 +33,6 @@ jobs:
|
||||
|
||||
clean-up:
|
||||
needs: get-build-matrix
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-build-matrix.outputs.nodes) }}
|
||||
@@ -48,7 +47,6 @@ jobs:
|
||||
|
||||
build:
|
||||
needs: [get-build-matrix, clean-up]
|
||||
if: ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJson(needs.get-build-matrix.outputs.examples) }}
|
||||
|
||||
@@ -32,15 +32,8 @@ jobs:
|
||||
echo "TAG=$TAG" >> $GITHUB_OUTPUT
|
||||
echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
|
||||
|
||||
build-comps-base:
|
||||
needs: [get-build-matrix]
|
||||
uses: ./.github/workflows/_build_comps_base_image.yml
|
||||
with:
|
||||
node: gaudi
|
||||
|
||||
build-and-test:
|
||||
needs: get-build-matrix
|
||||
if: ${{ needs.get-build-matrix.outputs.examples_json != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
|
||||
@@ -50,7 +43,6 @@ jobs:
|
||||
node: gaudi
|
||||
example: ${{ matrix.example }}
|
||||
test_compose: true
|
||||
inject_commit: true
|
||||
secrets: inherit
|
||||
|
||||
get-image-list:
|
||||
@@ -61,11 +53,9 @@ jobs:
|
||||
|
||||
publish:
|
||||
needs: [get-build-matrix, get-image-list, build-and-test]
|
||||
if: always() && ${{ needs.get-image-list.outputs.matrix != '' }}
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: "docker-build-gaudi"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
|
||||
76
.github/workflows/pr-chart-e2e.yml
vendored
76
.github/workflows/pr-chart-e2e.yml
vendored
@@ -1,76 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E Test with Helm Charts
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "!**.md"
|
||||
- "**/helm/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
name: Get-Test-Matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }}
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get Test Matrix
|
||||
id: get-test-matrix
|
||||
run: |
|
||||
set -x
|
||||
echo "base_commit=${{ github.event.pull_request.base.sha }}"
|
||||
base_commit=${{ github.event.pull_request.base.sha }}
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
values_files=$(git diff --name-only ${base_commit} ${merged_commit} | \
|
||||
grep "values.yaml" | \
|
||||
sort -u ) #CodeGen/kubernetes/helm/cpu-values.yaml
|
||||
run_matrix="{\"include\":["
|
||||
for values_file in ${values_files}; do
|
||||
if [ -f "$values_file" ]; then
|
||||
valuefile=$(basename "$values_file") # cpu-values.yaml
|
||||
example=$(echo "$values_file" | cut -d'/' -f1) # CodeGen
|
||||
if [[ "$valuefile" == *"gaudi"* ]]; then
|
||||
hardware="gaudi"
|
||||
elif [[ "$valuefile" == *"nv"* ]]; then
|
||||
continue
|
||||
else
|
||||
hardware="xeon"
|
||||
fi
|
||||
echo "example=${example}, hardware=${hardware}, valuefile=${valuefile}"
|
||||
if [[ $(echo ${run_matrix} | grep -c "{\"example\":\"${example}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then
|
||||
run_matrix="${run_matrix}{\"example\":\"${example}\",\"hardware\":\"${hardware}\"},"
|
||||
echo "------------------ add one values file ------------------"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
run_matrix="${run_matrix%,}"
|
||||
run_matrix=$run_matrix"]}"
|
||||
echo "run_matrix="${run_matrix}""
|
||||
echo "run_matrix="${run_matrix}"" >> $GITHUB_OUTPUT
|
||||
|
||||
helm-chart-test:
|
||||
needs: [job1]
|
||||
if: always() && ${{ fromJSON(needs.job1.outputs.run_matrix).length != 0 }}
|
||||
uses: ./.github/workflows/_helm-e2e.yml
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
with:
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
mode: "CI"
|
||||
secrets: inherit
|
||||
11
.github/workflows/pr-docker-compose-e2e.yml
vendored
11
.github/workflows/pr-docker-compose-e2e.yml
vendored
@@ -4,8 +4,8 @@
|
||||
name: E2E test with docker compose
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: ["main", "*rc"]
|
||||
pull_request:
|
||||
branches: ["main", "*rc", "genaicomps_refactor"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/Dockerfile**"
|
||||
@@ -28,20 +28,19 @@ jobs:
|
||||
if: ${{ !github.event.pull_request.draft }}
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '\.github|\.md|\.txt|kubernetes|gmc|assets|benchmark'
|
||||
diff_excluded_files: '.github|*.md|*.txt|kubernetes|manifest|gmc|assets|benchmark'
|
||||
|
||||
example-test:
|
||||
needs: [get-test-matrix]
|
||||
if: ${{ needs.get-test-matrix.outputs.run_matrix != '' }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.get-test-matrix.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
if: ${{ !github.event.pull_request.draft }}
|
||||
uses: ./.github/workflows/_run-docker-compose.yml
|
||||
with:
|
||||
registry: "opea"
|
||||
tag: "ci"
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
use_model_cache: true
|
||||
diff_excluded_files: '\.github|\.md|\.txt|kubernetes|gmc|assets|benchmark'
|
||||
diff_excluded_files: '.github|*.md|*.txt|kubernetes|manifest|gmc|assets|benchmark'
|
||||
secrets: inherit
|
||||
|
||||
@@ -21,7 +21,8 @@ jobs:
|
||||
- name: Clone Repo GenAIComps
|
||||
run: |
|
||||
cd ..
|
||||
git clone --depth 1 https://github.com/opea-project/GenAIComps.git
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
git checkout refactor_comps
|
||||
|
||||
- name: Check for Missing Dockerfile Paths in GenAIComps
|
||||
run: |
|
||||
@@ -60,7 +61,7 @@ jobs:
|
||||
shopt -s globstar
|
||||
no_add="FALSE"
|
||||
cd ${{github.workspace}}
|
||||
Dockerfiles=$(realpath $(find ./ -name '*Dockerfile*' ! -path '*/tests/*'))
|
||||
Dockerfiles=$(realpath $(find ./ -name '*Dockerfile*'))
|
||||
if [ -n "$Dockerfiles" ]; then
|
||||
for dockerfile in $Dockerfiles; do
|
||||
service=$(echo "$dockerfile" | awk -F '/GenAIExamples/' '{print $2}' | awk -F '/' '{print $2}')
|
||||
|
||||
@@ -8,10 +8,11 @@ on:
|
||||
branches: ["main", "*rc"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/gmc/**"
|
||||
- "**/kubernetes/**/gmc/**"
|
||||
- "**/tests/test_gmc**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/**/manifest/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -21,7 +22,7 @@ jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '\.github|docker_compose|assets|\.md|\.txt'
|
||||
diff_excluded_files: '.github|docker_compose|manifest|assets|*.md|*.txt'
|
||||
test_mode: "gmc"
|
||||
|
||||
gmc-test:
|
||||
8
.github/workflows/pr-link-path-scan.yml
vendored
8
.github/workflows/pr-link-path-scan.yml
vendored
@@ -76,7 +76,13 @@ jobs:
|
||||
cd ${{github.workspace}}
|
||||
fail="FALSE"
|
||||
repo_name=${{ github.event.pull_request.head.repo.full_name }}
|
||||
branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"
|
||||
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
|
||||
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
|
||||
branch="https://github.com/$owner/GenAIExamples/tree/${{ github.event.pull_request.head.ref }}"
|
||||
else
|
||||
branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
|
||||
fi
|
||||
link_head="https://github.com/opea-project/GenAIExamples/blob/main"
|
||||
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
|
||||
42
.github/workflows/pr-manifest-e2e.yml
vendored
Normal file
42
.github/workflows/pr-manifest-e2e.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: E2E test with manifests
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
branches: ["main", "*rc"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/Dockerfile**"
|
||||
- "**.py"
|
||||
- "**/kubernetes/**/manifest/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/**/gmc/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
diff_excluded_files: '.github|docker_compose|gmc|assets|*.md|*.txt|benchmark'
|
||||
test_mode: "manifest"
|
||||
|
||||
run-example:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.hardware }}
|
||||
example: ${{ matrix.example }}
|
||||
tag: ${{ github.event.pull_request.head.sha }}
|
||||
test_k8s: true
|
||||
secrets: inherit
|
||||
1
.github/workflows/push-image-build.yml
vendored
1
.github/workflows/push-image-build.yml
vendored
@@ -24,7 +24,6 @@ jobs:
|
||||
|
||||
image-build:
|
||||
needs: job1
|
||||
if: ${{ needs.job1.outputs.run_matrix != '{"include":[]}' }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
- name: Create Issue
|
||||
uses: daisy-ycguo/create-issue-action@stable
|
||||
with:
|
||||
token: ${{ secrets.ACTION_TOKEN }}
|
||||
token: ${{ secrets.Infra_Issue_Token }}
|
||||
owner: opea-project
|
||||
repo: GenAIInfra
|
||||
title: |
|
||||
@@ -54,6 +54,6 @@ jobs:
|
||||
|
||||
${{ env.changed_files }}
|
||||
|
||||
Please verify if the helm charts need to be changed accordingly.
|
||||
Please verify if the helm charts and manifests need to be changed accordingly.
|
||||
|
||||
> This issue was created automatically by CI.
|
||||
|
||||
@@ -9,8 +9,6 @@ import sys
|
||||
import yaml
|
||||
|
||||
images = {}
|
||||
dockerfiles = {}
|
||||
errors = []
|
||||
|
||||
|
||||
def check_docker_compose_build_definition(file_path):
|
||||
@@ -32,26 +30,18 @@ def check_docker_compose_build_definition(file_path):
|
||||
if not os.path.isfile(dockerfile):
|
||||
# dockerfile not exists in the current repo context, assume it's in 3rd party context
|
||||
dockerfile = os.path.normpath(os.path.join(context, build.get("dockerfile", "")))
|
||||
item = {"file_path": file_path, "service": service, "dockerfile": dockerfile, "image": image}
|
||||
item = {"file_path": file_path, "service": service, "dockerfile": dockerfile}
|
||||
if image in images and dockerfile != images[image]["dockerfile"]:
|
||||
errors.append(
|
||||
f"ERROR: !!! Found Conflicts !!!\n"
|
||||
f"Image: {image}, Dockerfile: {dockerfile}, defined in Service: {service}, File: {file_path}\n"
|
||||
print("ERROR: !!! Found Conflicts !!!")
|
||||
print(f"Image: {image}, Dockerfile: {dockerfile}, defined in Service: {service}, File: {file_path}")
|
||||
print(
|
||||
f"Image: {image}, Dockerfile: {images[image]['dockerfile']}, defined in Service: {images[image]['service']}, File: {images[image]['file_path']}"
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
# print(f"Add Image: {image} Dockerfile: {dockerfile}")
|
||||
images[image] = item
|
||||
|
||||
if dockerfile in dockerfiles and image != dockerfiles[dockerfile]["image"]:
|
||||
errors.append(
|
||||
f"WARNING: Different images using the same Dockerfile\n"
|
||||
f"Dockerfile: {dockerfile}, Image: {image}, defined in Service: {service}, File: {file_path}\n"
|
||||
f"Dockerfile: {dockerfile}, Image: {dockerfiles[dockerfile]['image']}, defined in Service: {dockerfiles[dockerfile]['service']}, File: {dockerfiles[dockerfile]['file_path']}"
|
||||
)
|
||||
else:
|
||||
dockerfiles[dockerfile] = item
|
||||
|
||||
|
||||
def parse_arg():
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -66,12 +56,6 @@ def main():
|
||||
for file_path in args.files:
|
||||
check_docker_compose_build_definition(file_path)
|
||||
print("SUCCESS: No Conlicts Found.")
|
||||
if errors:
|
||||
for error in errors:
|
||||
print(error)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("SUCCESS: No Conflicts Found.")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -26,27 +26,14 @@ case "$1" in
|
||||
echo "Release all ports used by the services in $yaml_file ..."
|
||||
pip install jq yq
|
||||
ports=$(yq '.services[].ports[] | split(":")[0]' $yaml_file | grep -o '[0-9a-zA-Z_-]\+')
|
||||
echo "All ports list..."
|
||||
echo "$ports"
|
||||
for port in $ports; do
|
||||
if [[ $port =~ [a-zA-Z_-] ]]; then
|
||||
echo "Search port value $port from the test case..."
|
||||
port_fix=$(grep -E "export $port=" tests/$test_case | cut -d'=' -f2)
|
||||
if [[ "$port_fix" == "" ]]; then
|
||||
echo "Can't find the port value from the test case, use the default value in yaml..."
|
||||
port_fix=$(yq '.services[].ports[]' $yaml_file | grep $port | cut -d':' -f2 | grep -o '[0-9a-zA-Z]\+')
|
||||
fi
|
||||
port=$port_fix
|
||||
fi
|
||||
if [[ $port =~ [0-9] ]]; then
|
||||
if [[ $port == 5000 ]]; then
|
||||
echo "Error: Port 5000 is used by local docker registry, please DO NOT use it in docker compose deployment!!!"
|
||||
exit 1
|
||||
fi
|
||||
echo "Check port $port..."
|
||||
cid=$(docker ps --filter "publish=${port}" --format "{{.ID}}")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && echo "release $port"; fi
|
||||
port=$(grep -E "export $port=" tests/$test_case | cut -d'=' -f2)
|
||||
fi
|
||||
echo $port
|
||||
cid=$(docker ps --filter "publish=${port}" --format "{{.ID}}")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
done
|
||||
;;
|
||||
*)
|
||||
|
||||
12
.github/workflows/scripts/get_test_matrix.sh
vendored
12
.github/workflows/scripts/get_test_matrix.sh
vendored
@@ -12,25 +12,21 @@ run_matrix="{\"include\":["
|
||||
|
||||
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
|
||||
for example in ${examples}; do
|
||||
if [[ ! -d $WORKSPACE/$example ]]; then continue; fi
|
||||
cd $WORKSPACE/$example
|
||||
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
|
||||
cd tests
|
||||
ls -l
|
||||
if [[ "$test_mode" == "docker_image_build" ]]; then
|
||||
hardware_list="gaudi xeon"
|
||||
find_name="test_manifest_on_*.sh"
|
||||
else
|
||||
find_name="test_${test_mode}*_on_*.sh"
|
||||
hardware_list=$(find . -type f -name "${find_name}" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
|
||||
fi
|
||||
hardware_list=$(find . -type f -name "${find_name}" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
|
||||
echo -e "Test supported hardware list: \n${hardware_list}"
|
||||
|
||||
run_hardware=""
|
||||
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '\.py|Dockerfile*|ui|docker_image_build' ) ]]; then
|
||||
echo "run test on all hardware if megaservice or ui code change..."
|
||||
run_hardware=$hardware_list
|
||||
elif [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep 'tests'| cut -d'/' -f3 | grep -vE '^test_|^_test' ) ]]; then
|
||||
echo "run test on all hardware if common test scripts change..."
|
||||
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
|
||||
# run test on all hardware if megaservice or ui code change
|
||||
run_hardware=$hardware_list
|
||||
else
|
||||
for hardware in ${hardware_list}; do
|
||||
|
||||
11
.github/workflows/scripts/k8s-utils.sh
vendored
11
.github/workflows/scripts/k8s-utils.sh
vendored
@@ -2,7 +2,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
#set -xe
|
||||
|
||||
function dump_pod_log() {
|
||||
pod_name=$1
|
||||
@@ -12,7 +12,7 @@ function dump_pod_log() {
|
||||
kubectl describe pod $pod_name -n $namespace
|
||||
echo "-----------------------------------"
|
||||
echo "#kubectl logs $pod_name -n $namespace"
|
||||
kubectl logs $pod_name -n $namespace --all-containers --prefix=true
|
||||
kubectl logs $pod_name -n $namespace
|
||||
echo "-----------------------------------"
|
||||
}
|
||||
|
||||
@@ -44,13 +44,8 @@ function dump_pods_status() {
|
||||
|
||||
function dump_all_pod_logs() {
|
||||
namespace=$1
|
||||
echo "------SUMMARY of POD STATUS in NS $namespace------"
|
||||
kubectl get pods -n $namespace -o wide
|
||||
echo "------SUMMARY of SVC STATUS in NS $namespace------"
|
||||
kubectl get services -n $namespace -o wide
|
||||
echo "------SUMMARY of endpoint STATUS in NS $namespace------"
|
||||
kubectl get endpoints -n $namespace -o wide
|
||||
echo "-----DUMP POD STATUS AND LOG in NS $namespace------"
|
||||
|
||||
pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
|
||||
for pod_name in $pods
|
||||
do
|
||||
|
||||
8
.github/workflows/weekly-update-images.yml
vendored
8
.github/workflows/weekly-update-images.yml
vendored
@@ -1,9 +1,11 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Weekly update 3rd party images
|
||||
name: Weekly update base images and 3rd party images
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
@@ -14,8 +16,8 @@ jobs:
|
||||
freeze-images:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
USER_NAME: "CICD-at-OPEA"
|
||||
USER_EMAIL: "CICD@opea.dev"
|
||||
USER_NAME: "NeuralChatBot"
|
||||
USER_EMAIL: "grp_neural_chat_bot@intel.com"
|
||||
BRANCH_NAME: "update_images_tag"
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
||||
@@ -7,7 +7,7 @@ ci:
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: end-of-file-fixer
|
||||
files: (.*\.(py|md|rst|yaml|yml|json|ts|js|html|svelte|sh))$
|
||||
@@ -100,18 +100,18 @@ repos:
|
||||
- prettier@3.2.5
|
||||
|
||||
- repo: https://github.com/psf/black.git
|
||||
rev: 24.10.0
|
||||
rev: 24.4.2
|
||||
hooks:
|
||||
- id: black
|
||||
files: (.*\.py)$
|
||||
|
||||
- repo: https://github.com/asottile/blacken-docs
|
||||
rev: 1.19.1
|
||||
rev: 1.18.0
|
||||
hooks:
|
||||
- id: blacken-docs
|
||||
args: [--line-length=120, --skip-errors]
|
||||
additional_dependencies:
|
||||
- black==24.10.0
|
||||
- black==24.4.2
|
||||
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.3.0
|
||||
@@ -122,7 +122,7 @@ repos:
|
||||
- tomli
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.6
|
||||
rev: v0.5.0
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix, --no-cache]
|
||||
|
||||
@@ -1,17 +1,9 @@
|
||||
# Agents for Question Answering
|
||||
|
||||
## Table of contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Deploy with Docker](#deploy-with-docker)
|
||||
3. [Launch the UI](#launch-the-ui)
|
||||
4. [Validate Services](#validate-services)
|
||||
5. [Register Tools](#how-to-register-other-tools-with-the-ai-agent)
|
||||
|
||||
## Overview
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram below shows a supervisor agent that interfaces with the user and dispatches tasks to two worker agents to gather information and come up with answers. The worker RAG agent uses the retrieval tool to retrieve relevant documents from a knowledge base - a vector database. The worker SQL agent retrieves relevant data from a SQL database. Although not included in this example by default, other tools such as a web search tool or a knowledge graph query tool can be used by the supervisor agent to gather information from additional sources.
|
||||

|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
|
||||

|
||||
|
||||
The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
@@ -46,7 +38,6 @@ flowchart LR
|
||||
end
|
||||
AG_REACT([Agent MicroService - react]):::blue
|
||||
AG_RAG([Agent MicroService - rag]):::blue
|
||||
AG_SQL([Agent MicroService - sql]):::blue
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
DP([Data Preparation MicroService]):::blue
|
||||
TEI_RER{{Reranking service<br>}}
|
||||
@@ -60,7 +51,6 @@ flowchart LR
|
||||
direction LR
|
||||
a[User Input Query] --> AG_REACT
|
||||
AG_REACT --> AG_RAG
|
||||
AG_REACT --> AG_SQL
|
||||
AG_RAG --> DocIndexRetriever-MegaService
|
||||
EM ==> RET
|
||||
RET ==> RER
|
||||
@@ -69,7 +59,6 @@ flowchart LR
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
AG_RAG <-.-> LLM_gen
|
||||
AG_SQL <-.-> LLM_gen
|
||||
AG_REACT <-.-> LLM_gen
|
||||
EM <-.-> TEI_EM
|
||||
RET <-.-> R_RET
|
||||
@@ -83,169 +72,152 @@ flowchart LR
|
||||
|
||||
```
|
||||
|
||||
### Why should AI Agents be used for question-answering?
|
||||
### Why Agent for question answering?
|
||||
|
||||
1. **Improve relevancy of retrieved context.**
|
||||
RAG agents can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering a user's question. Compared to conventional RAG, RAG agents significantly improve the correctness and relevancy of the answer because of the iterations it goes through.
|
||||
2. **Expand scope of skills.**
|
||||
The supervisor agent interacts with multiple worker agents that specialize in different skills (e.g., retrieve documents, write SQL queries, etc.). Thus, it can answer questions with different methods.
|
||||
3. **Hierarchical multi-agents improve performance.**
|
||||
Expert worker agents, such as RAG agents and SQL agents, can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information to provide a comprehensive answer. If only one agent is used and all tools are provided to this single agent, it can lead to large overhead or not use the best tool to provide accurate answers.
|
||||
1. Improve relevancy of retrieved context.
|
||||
Agent can rephrase user queries, decompose user queries, and iterate to get the most relevant context for answering user's questions. Compared to conventional RAG, RAG agent can significantly improve the correctness and relevancy of the answer.
|
||||
2. Use tools to get additional knowledge.
|
||||
For example, knowledge graphs and SQL databases can be exposed as APIs for Agents to gather knowledge that may be missing in the retrieval vector database.
|
||||
3. Hierarchical agent can further improve performance.
|
||||
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
|
||||
|
||||
## Deploy with docker
|
||||
## Deployment with docker
|
||||
|
||||
### 1. Set up environment </br>
|
||||
1. Build agent docker image [Optional]
|
||||
|
||||
#### First, clone the `GenAIExamples` repo.
|
||||
> [!NOTE]
|
||||
> the step is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
|
||||
|
||||
First, clone the opea GenAIComps repo.
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
|
||||
#### Second, set up environment variables.
|
||||
|
||||
##### For proxy environments only
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
|
||||
```
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
##### For using open-source llms
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
First, clone this repo.
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
Second, set up env vars.
|
||||
|
||||
```
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# for using open-source llms
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
|
||||
|
||||
# optional: OPANAI_API_KEY if you want to use OpenAI models
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch other tools. </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
5. Launch agent services</br>
|
||||
We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
|
||||
|
||||
Deploy it on Gaudi or Xeon respectively
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} Gaudi
|
||||
:sync: Gaudi
|
||||
|
||||
To use open-source LLMs on Gaudi2, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
```
|
||||
|
||||
:::
|
||||
:::{tab-item} Xeon
|
||||
:sync: Xeon
|
||||
|
||||
To use OpenAI models, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
|
||||
```
|
||||
|
||||
##### [Optional] OPANAI_API_KEY to use OpenAI models
|
||||
|
||||
```
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
#### Third, set up environment variables for the selected hardware using the corresponding `set_env.sh`
|
||||
|
||||
##### Gaudi
|
||||
|
||||
```
|
||||
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
|
||||
```
|
||||
|
||||
##### Xeon
|
||||
|
||||
```
|
||||
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
|
||||
```
|
||||
|
||||
### 2. Launch the multi-agent system. </br>
|
||||
|
||||
Two options are provided for the `llm_engine` of the agents: 1. open-source LLMs on Gaudi, 2. OpenAI models via API calls.
|
||||
|
||||
#### Gaudi
|
||||
|
||||
On Gaudi, `meta-llama/Meta-Llama-3.1-70B-Instruct` will be served using vllm.
|
||||
By default, both the RAG agent and SQL agent will be launched to support the React Agent.
|
||||
The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose.yaml` files need to be run with docker compose to start the multi-agent system.
|
||||
|
||||
> **Note**: To enable the web search tool, skip this step and proceed to the "[Optional] Web Search Tool Support" section.
|
||||
|
||||
```bash
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml up -d
|
||||
```
|
||||
|
||||
To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
|
||||
Gaudi example with Open Telemetry feature:
|
||||
|
||||
```bash
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
```
|
||||
|
||||
##### [Optional] Web Search Tool Support
|
||||
|
||||
<details>
|
||||
<summary> Instructions </summary>
|
||||
A web search tool is supported in this example and can be enabled by running docker compose with the `compose.webtool.yaml` file.
|
||||
The Google Search API is used. Follow the [instructions](https://python.langchain.com/docs/integrations/tools/google_search) to create an API key and enable the Custom Search API on a Google account. The environment variables `GOOGLE_CSE_ID` and `GOOGLE_API_KEY` need to be set.
|
||||
|
||||
```bash
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/
|
||||
export GOOGLE_CSE_ID="YOUR_ID"
|
||||
export GOOGLE_API_KEY="YOUR_API_KEY"
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.webtool.yaml up -d
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
#### Xeon
|
||||
|
||||
On Xeon, only OpenAI models are supported.
|
||||
By default, both the RAG Agent and SQL Agent will be launched to support the React Agent.
|
||||
The React Agent requires the DocIndexRetriever's [`compose.yaml`](../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml) file, so two `compose yaml` files need to be run with docker compose to start the multi-agent system.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
|
||||
```
|
||||
|
||||
### 3. Ingest Data into the vector database
|
||||
|
||||
The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.
|
||||
|
||||
```bash
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
> **Note**: This is a one-time operation.
|
||||
|
||||
## Launch the UI
|
||||
|
||||
Open a web browser to http://localhost:5173 to access the UI. Ensure the environment variable `AGENT_URL` is set to http://$ip_address:9090/v1/chat/completions in [ui/svelte/.env](./ui/svelte/.env) or else the UI may not work properly.
|
||||
|
||||
The AgentQnA UI can be deployed locally or using Docker. To customize deployment, refer to the [AgentQnA UI Guide](./ui/svelte/README.md).
|
||||
|
||||
## [Optional] Deploy using Helm Charts
|
||||
|
||||
Refer to the [AgentQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AgentQnA on Kubernetes.
|
||||
|
||||
## Validate Services
|
||||
|
||||
1. First look at logs for each of the agent docker containers:
|
||||
|
||||
```bash
|
||||
# worker RAG agent
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
# worker SQL agent
|
||||
docker logs sql-agent-endpoint
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
Look for the message "HTTP server setup successful" to confirm the agent docker container has started successfully.</p>
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
2. Use python to validate each agent is working properly:
|
||||
Second, validate worker agent:
|
||||
|
||||
```bash
|
||||
# RAG worker agent
|
||||
python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "Tell me about Michael Jackson song Thriller" --agent_role "worker" --ext_port 9095
|
||||
|
||||
# SQL agent
|
||||
python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "How many employees in company" --agent_role "worker" --ext_port 9096
|
||||
|
||||
# supervisor agent: this will test a two-turn conversation
|
||||
python $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port 9090
|
||||
```
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register other tools with the AI agent
|
||||
Third, validate supervisor agent:
|
||||
|
||||
The [tools](./tools) folder contains YAML and Python files for additional tools for the supervisor and worker agents. Refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/src/README.md) to add tools and customize the AI agents.
|
||||
```
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
|
||||
BIN
AgentQnA/assets/agent_qna_arch.png
Normal file
BIN
AgentQnA/assets/agent_qna_arch.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 207 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 56 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 57 KiB |
@@ -1,342 +0,0 @@
|
||||
# Build Mega Service of AgentQnA on AMD ROCm GPU
|
||||
|
||||
## Build Docker Images
|
||||
|
||||
### 1. Build Docker Image
|
||||
|
||||
- #### Create application install directory and go to it:
|
||||
|
||||
```bash
|
||||
mkdir ~/agentqna-install && cd agentqna-install
|
||||
```
|
||||
|
||||
- #### Clone the repository GenAIExamples (the default repository branch "main" is used here):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
If you need to use a specific branch/tag of the GenAIExamples repository, then (v1.3 replace with its own value):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git && cd GenAIExamples && git checkout v1.3
|
||||
```
|
||||
|
||||
We remind you that when using a specific version of the code, you need to use the README from this version:
|
||||
|
||||
- #### Go to build directory:
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_image_build
|
||||
```
|
||||
|
||||
- Cleaning up the GenAIComps repository if it was previously cloned in this directory.
|
||||
This is necessary if the build was performed earlier and the GenAIComps folder exists and is not empty:
|
||||
|
||||
```bash
|
||||
echo Y | rm -R GenAIComps
|
||||
```
|
||||
|
||||
- #### Clone the repository GenAIComps (the default repository branch "main" is used here):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
|
||||
We remind you that when using a specific version of the code, you need to use the README from this version.
|
||||
|
||||
- #### Setting the list of images for the build (from the build file.yaml)
|
||||
|
||||
If you want to deploy a vLLM-based or TGI-based application, then the set of services is installed as follows:
|
||||
|
||||
#### vLLM-based application
|
||||
|
||||
```bash
|
||||
service_list="vllm-rocm agent agent-ui"
|
||||
```
|
||||
|
||||
#### TGI-based application
|
||||
|
||||
```bash
|
||||
service_list="agent agent-ui"
|
||||
```
|
||||
|
||||
- #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
```
|
||||
|
||||
- #### Build Docker Images
|
||||
|
||||
```bash
|
||||
docker compose -f build.yaml build ${service_list} --no-cache
|
||||
```
|
||||
|
||||
- #### Build DocIndexRetriever Docker Images
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/DocIndexRetriever/docker_image_build/
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
service_list="doc-index-retriever dataprep embedding retriever reranking"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache
|
||||
```
|
||||
|
||||
- #### Pull DocIndexRetriever Docker Images
|
||||
|
||||
```bash
|
||||
docker pull redis/redis-stack:7.2.0-v9
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
```
|
||||
|
||||
After the build, we check the list of images with the command:
|
||||
|
||||
```bash
|
||||
docker image ls
|
||||
```
|
||||
|
||||
The list of images should include:
|
||||
|
||||
##### vLLM-based application:
|
||||
|
||||
- opea/vllm-rocm:latest
|
||||
- opea/agent:latest
|
||||
- redis/redis-stack:7.2.0-v9
|
||||
- ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- opea/embedding:latest
|
||||
- opea/retriever:latest
|
||||
- opea/reranking:latest
|
||||
- opea/doc-index-retriever:latest
|
||||
|
||||
##### TGI-based application:
|
||||
|
||||
- ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
- opea/agent:latest
|
||||
- redis/redis-stack:7.2.0-v9
|
||||
- ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- opea/embedding:latest
|
||||
- opea/retriever:latest
|
||||
- opea/reranking:latest
|
||||
- opea/doc-index-retriever:latest
|
||||
|
||||
---
|
||||
|
||||
## Deploy the AgentQnA Application
|
||||
|
||||
### Docker Compose Configuration for AMD GPUs
|
||||
|
||||
To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file:
|
||||
|
||||
- compose_vllm.yaml - for vLLM-based application
|
||||
- compose.yaml - for TGI-based
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example:
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/render128:/dev/dri/render128
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
**How to Identify GPU Device IDs:**
|
||||
Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU.
|
||||
|
||||
### Set deploy environment variables
|
||||
|
||||
#### Setting variables in the operating system environment:
|
||||
|
||||
```bash
|
||||
### Replace the string 'server_address' with your local server IP address
|
||||
export host_ip='server_address'
|
||||
### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token.
|
||||
export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token'
|
||||
### Replace the string 'your_langchain_api_key' with your LANGCHAIN API KEY.
|
||||
export LANGCHAIN_API_KEY='your_langchain_api_key'
|
||||
export LANGCHAIN_TRACING_V2=""
|
||||
```
|
||||
|
||||
### Start the services:
|
||||
|
||||
#### If you use vLLM
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash launch_agent_service_vllm_rocm.sh
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash launch_agent_service_tgi_rocm.sh
|
||||
```
|
||||
|
||||
All containers should be running and should not restart:
|
||||
|
||||
##### If you use vLLM:
|
||||
|
||||
- dataprep-redis-server
|
||||
- doc-index-retriever-server
|
||||
- embedding-server
|
||||
- rag-agent-endpoint
|
||||
- react-agent-endpoint
|
||||
- redis-vector-db
|
||||
- reranking-tei-xeon-server
|
||||
- retriever-redis-server
|
||||
- sql-agent-endpoint
|
||||
- tei-embedding-server
|
||||
- tei-reranking-server
|
||||
- vllm-service
|
||||
|
||||
##### If you use TGI:
|
||||
|
||||
- dataprep-redis-server
|
||||
- doc-index-retriever-server
|
||||
- embedding-server
|
||||
- rag-agent-endpoint
|
||||
- react-agent-endpoint
|
||||
- redis-vector-db
|
||||
- reranking-tei-xeon-server
|
||||
- retriever-redis-server
|
||||
- sql-agent-endpoint
|
||||
- tei-embedding-server
|
||||
- tei-reranking-server
|
||||
- tgi-service
|
||||
|
||||
---
|
||||
|
||||
## Validate the Services
|
||||
|
||||
### 1. Validate the vLLM/TGI Service
|
||||
|
||||
#### If you use vLLM:
|
||||
|
||||
```bash
|
||||
DATA='{"model": "Intel/neural-chat-7b-v3-3t", '\
|
||||
'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
|
||||
|
||||
curl http://${HOST_IP}:${VLLM_SERVICE_PORT}/v1/chat/completions \
|
||||
-X POST \
|
||||
-d "$DATA" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Checking the response from the service. The response should be similar to JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-142f34ef35b64a8db3deedd170fed951",
|
||||
"object": "chat.completion",
|
||||
"created": 1742270316,
|
||||
"model": "Intel/neural-chat-7b-v3-3",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": []
|
||||
},
|
||||
"logprobs": null,
|
||||
"finish_reason": "length",
|
||||
"stop_reason": null
|
||||
}
|
||||
],
|
||||
"usage": { "prompt_tokens": 66, "total_tokens": 322, "completion_tokens": 256, "prompt_tokens_details": null },
|
||||
"prompt_logprobs": null
|
||||
}
|
||||
```
|
||||
|
||||
If the service response has a meaningful response in the value of the "choices.message.content" key,
|
||||
then we consider the vLLM service to be successfully launched
|
||||
|
||||
#### If you use TGI:
|
||||
|
||||
```bash
|
||||
DATA='{"inputs":"What is Deep Learning?",'\
|
||||
'"parameters":{"max_new_tokens":256,"do_sample": true}}'
|
||||
|
||||
curl http://${HOST_IP}:${TGI_SERVICE_PORT}/generate \
|
||||
-X POST \
|
||||
-d "$DATA" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Checking the response from the service. The response should be similar to JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"generated_text": " "
|
||||
}
|
||||
```
|
||||
|
||||
If the service response has a meaningful response in the value of the "generated_text" key,
|
||||
then we consider the TGI service to be successfully launched
|
||||
|
||||
### 2. Validate Agent Services
|
||||
|
||||
#### Validate Rag Agent Service
|
||||
|
||||
```bash
|
||||
export agent_port=${WORKER_RAG_AGENT_PORT}
|
||||
prompt="Tell me about Michael Jackson song Thriller"
|
||||
python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port
|
||||
```
|
||||
|
||||
The response must contain the meaningful text of the response to the request from the "prompt" variable
|
||||
|
||||
#### Validate Sql Agent Service
|
||||
|
||||
```bash
|
||||
export agent_port=${WORKER_SQL_AGENT_PORT}
|
||||
prompt="How many employees are there in the company?"
|
||||
python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port
|
||||
```
|
||||
|
||||
The answer should make sense - "8 employees in the company"
|
||||
|
||||
#### Validate React (Supervisor) Agent Service
|
||||
|
||||
```bash
|
||||
export agent_port=${SUPERVISOR_REACT_AGENT_PORT}
|
||||
python3 ~/agentqna-install/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream
|
||||
```
|
||||
|
||||
The response should contain "Iron Maiden"
|
||||
|
||||
### 3. Stop application
|
||||
|
||||
#### If you use vLLM
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash stop_agent_service_vllm_rocm.sh
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
|
||||
```bash
|
||||
cd ~/agentqna-install/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash stop_agent_service_tgi_rocm.sh
|
||||
```
|
||||
@@ -1,24 +1,26 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
|
||||
container_name: tgi-service
|
||||
agent-tgi-server:
|
||||
image: ${AGENTQNA_TGI_IMAGE}
|
||||
container_name: agent-tgi-server
|
||||
ports:
|
||||
- "${TGI_SERVICE_PORT-8085}:80"
|
||||
- "${AGENTQNA_TGI_SERVICE_PORT-8085}:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- /var/opea/agent-service/:/data
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${AGENTQNA_TGI_SERVICE_PORT}"
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
shm_size: 32g
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
- /dev/dri/${AGENTQNA_CARD_ID}:/dev/dri/${AGENTQNA_CARD_ID}
|
||||
- /dev/dri/${AGENTQNA_RENDER_ID}:/dev/dri/${AGENTQNA_RENDER_ID}
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
@@ -29,17 +31,17 @@ services:
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192
|
||||
|
||||
worker-rag-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
- "${TOOLSET_PATH}:/home/user/tools/"
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "${WORKER_RAG_AGENT_PORT:-9095}:9095"
|
||||
- "9095:9095"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent_llama
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
@@ -47,7 +49,7 @@ services:
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
streaming: false
|
||||
tools: /home/user/tools/worker_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
|
||||
@@ -59,49 +61,21 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-worker-agent-service"
|
||||
port: 9095
|
||||
|
||||
worker-sql-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: sql-agent-endpoint
|
||||
volumes:
|
||||
- "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
|
||||
ports:
|
||||
- "${WORKER_SQL_AGENT_PORT:-9096}:9096"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: sql_agent_llama
|
||||
with_memory: false
|
||||
db_name: ${db_name}
|
||||
db_path: ${db_path}
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
port: 9096
|
||||
|
||||
supervisor-react-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- agent-tgi-server
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
- "${TOOLSET_PATH}:/home/user/tools/"
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
|
||||
- "${AGENTQNA_FRONTEND_PORT}:9090"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_llama
|
||||
with_memory: true
|
||||
strategy: react_langgraph
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
@@ -109,7 +83,7 @@ services:
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: true
|
||||
streaming: false
|
||||
tools: /home/user/tools/supervisor_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -118,7 +92,6 @@ services:
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
|
||||
CRAG_SERVER: ${CRAG_SERVER}
|
||||
WORKER_AGENT_URL: ${WORKER_AGENT_URL}
|
||||
SQL_AGENT_URL: ${SQL_AGENT_URL}
|
||||
CRAG_SERVER: $CRAG_SERVER
|
||||
WORKER_AGENT_URL: $WORKER_AGENT_URL
|
||||
port: 9090
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
|
||||
services:
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
|
||||
container_name: vllm-service
|
||||
ports:
|
||||
- "${VLLM_SERVICE_PORT:-8081}:8011"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
WILM_USE_TRITON_FLASH_ATTENTION: 0
|
||||
PYTORCH_JIT: 0
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
shm_size: 20G
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
- apparmor=unconfined
|
||||
command: "--model ${VLLM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
|
||||
ipc: host
|
||||
|
||||
worker-rag-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "${WORKER_RAG_AGENT_PORT:-9095}:9095"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent_llama
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
tools: /home/user/tools/worker_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-worker-agent-service"
|
||||
port: 9095
|
||||
|
||||
worker-sql-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: sql-agent-endpoint
|
||||
volumes:
|
||||
- "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
|
||||
ports:
|
||||
- "${WORKER_SQL_AGENT_PORT:-9096}:9096"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: sql_agent_llama
|
||||
with_memory: false
|
||||
db_name: ${db_name}
|
||||
db_path: ${db_path}
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
port: 9096
|
||||
|
||||
supervisor-react-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_llama
|
||||
with_memory: true
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: true
|
||||
tools: /home/user/tools/supervisor_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
|
||||
CRAG_SERVER: ${CRAG_SERVER}
|
||||
WORKER_AGENT_URL: ${WORKER_AGENT_URL}
|
||||
SQL_AGENT_URL: ${SQL_AGENT_URL}
|
||||
port: 9090
|
||||
@@ -1,87 +1,47 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Before start script:
|
||||
# export host_ip="your_host_ip_or_host_name"
|
||||
# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
|
||||
# export LANGCHAIN_API_KEY="your_langchain_api_key"
|
||||
# export LANGCHAIN_TRACING_V2=""
|
||||
|
||||
# Set server hostname or IP address
|
||||
WORKPATH=$(dirname "$PWD")/..
|
||||
export ip_address=${host_ip}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
export AGENTQNA_TGI_SERVICE_PORT="8085"
|
||||
|
||||
# Set services IP ports
|
||||
export TGI_SERVICE_PORT="18110"
|
||||
export WORKER_RAG_AGENT_PORT="18111"
|
||||
export WORKER_SQL_AGENT_PORT="18112"
|
||||
export SUPERVISOR_REACT_AGENT_PORT="18113"
|
||||
export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
export TOOLSET_PATH=${WORKPATH}/../../../tools/
|
||||
export recursion_limit_worker=12
|
||||
export LLM_ENDPOINT_URL=http://${ip_address}:${TGI_SERVICE_PORT}
|
||||
# LLM related environment variables
|
||||
export AGENTQNA_CARD_ID="card1"
|
||||
export AGENTQNA_RENDER_ID="renderD136"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
#export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}"
|
||||
export temperature=0.01
|
||||
export max_new_tokens=512
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
|
||||
export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
|
||||
export db_name=Chinook
|
||||
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
|
||||
|
||||
# agent related environment variables
|
||||
export AGENTQNA_WORKER_AGENT_SERVICE_PORT="9095"
|
||||
export TOOLSET_PATH=/home/huggingface/datamonsters/amd-opea/GenAIExamples/AgentQnA/tools/
|
||||
echo "TOOLSET_PATH=${TOOLSET_PATH}"
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${AGENTQNA_WORKER_AGENT_SERVICE_PORT}/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:18881
|
||||
|
||||
export AGENTQNA_FRONTEND_PORT="9090"
|
||||
|
||||
#retrieval_tool
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export REDIS_URL="redis://${host_ip}:26379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||
|
||||
echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
|
||||
echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
|
||||
echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
|
||||
echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
|
||||
|
||||
echo "Downloading chinook data..."
|
||||
echo Y | rm -R chinook-database
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
echo Y | rm -R ../../../../../AgentQnA/tests/Chinook_Sqlite.sqlite
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ../../../../../AgentQnA/tests
|
||||
|
||||
docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
|
||||
docker compose -f compose.yaml up -d
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-service > ${WORKPATH}/tgi_service_start.log
|
||||
if grep -q Connected ${WORKPATH}/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 10s
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
echo "Starting CRAG server"
|
||||
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=${CRAG_SERVER_PORT}:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Before start script:
|
||||
# export host_ip="your_host_ip_or_host_name"
|
||||
# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
|
||||
# export LANGCHAIN_API_KEY="your_langchain_api_key"
|
||||
# export LANGCHAIN_TRACING_V2=""
|
||||
|
||||
# Set server hostname or IP address
|
||||
export ip_address=${host_ip}
|
||||
|
||||
# Set services IP ports
|
||||
export VLLM_SERVICE_PORT="18110"
|
||||
export WORKER_RAG_AGENT_PORT="18111"
|
||||
export WORKER_SQL_AGENT_PORT="18112"
|
||||
export SUPERVISOR_REACT_AGENT_PORT="18113"
|
||||
export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
export TOOLSET_PATH=${WORKPATH}/../../../tools/
|
||||
export recursion_limit_worker=12
|
||||
export LLM_ENDPOINT_URL=http://${ip_address}:${VLLM_SERVICE_PORT}
|
||||
export LLM_MODEL_ID=${VLLM_LLM_MODEL_ID}
|
||||
export temperature=0.01
|
||||
export max_new_tokens=512
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
|
||||
export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
|
||||
export db_name=Chinook
|
||||
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
|
||||
echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
|
||||
echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
|
||||
echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
|
||||
echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
|
||||
|
||||
echo "Downloading chinook data..."
|
||||
echo Y | rm -R chinook-database
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
echo Y | rm -R ../../../../../AgentQnA/tests/Chinook_Sqlite.sqlite
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ../../../../../AgentQnA/tests
|
||||
|
||||
docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
|
||||
docker compose -f compose_vllm.yaml up -d
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 500 ]]; do
|
||||
docker logs vllm-service >& "${WORKPATH}"/vllm-service_start.log
|
||||
if grep -q "Application startup complete" "${WORKPATH}"/vllm-service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 20s
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
echo "Starting CRAG server"
|
||||
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=${CRAG_SERVER_PORT}:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
@@ -6,7 +6,7 @@
|
||||
WORKPATH=$(dirname "$PWD")/..
|
||||
export ip_address=${host_ip}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
|
||||
export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
export AGENTQNA_TGI_SERVICE_PORT="19001"
|
||||
|
||||
# LLM related environment variables
|
||||
@@ -14,7 +14,7 @@ export AGENTQNA_CARD_ID="card1"
|
||||
export AGENTQNA_RENDER_ID="renderD136"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}"
|
||||
export temperature=0.01
|
||||
@@ -41,22 +41,6 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
|
||||
|
||||
echo "Removing chinook data..."
|
||||
echo Y | rm -R chinook-database
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
|
||||
echo "Stopping CRAG server"
|
||||
docker rm kdd-cup-24-crag-service --force
|
||||
|
||||
echo "Stopping Agent services"
|
||||
docker compose -f compose.yaml down
|
||||
|
||||
echo "Stopping Retrieval services"
|
||||
docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml down
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
|
||||
@@ -1,84 +0,0 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
# Before start script:
|
||||
# export host_ip="your_host_ip_or_host_name"
|
||||
# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
|
||||
# export LANGCHAIN_API_KEY="your_langchain_api_key"
|
||||
# export LANGCHAIN_TRACING_V2=""
|
||||
|
||||
# Set server hostname or IP address
|
||||
export ip_address=${host_ip}
|
||||
|
||||
# Set services IP ports
|
||||
export VLLM_SERVICE_PORT="18110"
|
||||
export WORKER_RAG_AGENT_PORT="18111"
|
||||
export WORKER_SQL_AGENT_PORT="18112"
|
||||
export SUPERVISOR_REACT_AGENT_PORT="18113"
|
||||
export CRAG_SERVER_PORT="18114"
|
||||
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../../
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export VLLM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HF_CACHE_DIR="./data"
|
||||
export MODEL_CACHE="./data"
|
||||
export TOOLSET_PATH=${WORKPATH}/../../../tools/
|
||||
export recursion_limit_worker=12
|
||||
export LLM_ENDPOINT_URL=http://${ip_address}:${VLLM_SERVICE_PORT}
|
||||
export LLM_MODEL_ID=${VLLM_LLM_MODEL_ID}
|
||||
export temperature=0.01
|
||||
export max_new_tokens=512
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
|
||||
export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
|
||||
export db_name=Chinook
|
||||
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
|
||||
echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
|
||||
echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
|
||||
echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
|
||||
echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
|
||||
|
||||
echo "Removing chinook data..."
|
||||
echo Y | rm -R chinook-database
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
|
||||
echo "Stopping CRAG server"
|
||||
docker rm kdd-cup-24-crag-service --force
|
||||
|
||||
echo "Stopping Agent services"
|
||||
docker compose -f compose_vllm.yaml down
|
||||
|
||||
echo "Stopping Retrieval services"
|
||||
docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml down
|
||||
@@ -1,3 +1,100 @@
|
||||
# Single node on-prem deployment with Docker Compose on Xeon Scalable processors
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. To deploy the example on Xeon, OpenAI LLM models via API calls are used. For instructions, refer to the deployment guide [here](../../../../README.md).
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
|
||||
|
||||
## Deployment with docker
|
||||
|
||||
1. First, clone this repo.
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
```
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
#OPANAI_API_KEY if you want to use OpenAI models
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch Tool service
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
5. Launch `Agent` service
|
||||
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and llama3.1-70B-instruct (served by TGI-Gaudi) in Gaudi example. To use openai llm, run command below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
6. [Optional] Build `Agent` docker image if pulling images failed.
|
||||
|
||||
```
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
worker-rag-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
@@ -13,14 +13,13 @@ services:
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: openai
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
model: ${model}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
streaming: false
|
||||
tools: /home/user/tools/worker_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
|
||||
@@ -32,40 +31,12 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-worker-agent-service"
|
||||
port: 9095
|
||||
|
||||
worker-sql-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: sql-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # SQL database
|
||||
ports:
|
||||
- "9096:9096"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: sql_agent
|
||||
with_memory: false
|
||||
db_name: ${db_name}
|
||||
db_path: ${db_path}
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: openai
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
model: ${model}
|
||||
temperature: 0
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
port: 9096
|
||||
|
||||
supervisor-react-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- worker-rag-agent
|
||||
- worker-sql-agent
|
||||
volumes:
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
@@ -73,15 +44,14 @@ services:
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_llama
|
||||
with_memory: true
|
||||
strategy: react_langgraph
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: openai
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
model: ${model}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: true
|
||||
streaming: false
|
||||
tools: /home/user/tools/supervisor_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -92,23 +62,4 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
|
||||
CRAG_SERVER: $CRAG_SERVER
|
||||
WORKER_AGENT_URL: $WORKER_AGENT_URL
|
||||
SQL_AGENT_URL: $SQL_AGENT_URL
|
||||
port: 9090
|
||||
mock-api:
|
||||
image: docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
container_name: mock-api
|
||||
ports:
|
||||
- "8080:8000"
|
||||
ipc: host
|
||||
agent-ui:
|
||||
image: opea/agent-ui
|
||||
container_name: agent-ui
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env # test db
|
||||
ports:
|
||||
- "5173:5173"
|
||||
ipc: host
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export model="gpt-4o-mini-2024-07-18"
|
||||
export temperature=0
|
||||
export max_new_tokens=4096
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
docker compose -f compose_openai.yaml up -d
|
||||
@@ -1,57 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
|
||||
if [[ -z "${WORKDIR}" ]]; then
|
||||
echo "Please set WORKDIR environment variable"
|
||||
exit 0
|
||||
fi
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export model="gpt-4o-mini-2024-07-18"
|
||||
export temperature=0
|
||||
export max_new_tokens=4096
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
export db_name=Chinook
|
||||
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
|
||||
|
||||
if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
|
||||
echo "Download Chinook_Sqlite!"
|
||||
wget -O $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite https://github.com/lerocha/chinook-database/releases/download/v1.4.5/Chinook_Sqlite.sqlite
|
||||
fi
|
||||
|
||||
# retriever
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
|
||||
|
||||
export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui"
|
||||
@@ -1,3 +1,105 @@
|
||||
# Single node on-prem deployment AgentQnA on Gaudi
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. To deploy the example on Gaudi using open-source LLMs, refer to the deployment guide [here](../../../../README.md).
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Gaudi using open-source LLMs,
|
||||
For more details, please refer to the deployment guide [here](../../../../README.md).
|
||||
|
||||
## Deployment with docker
|
||||
|
||||
1. First, clone this repo.
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
```
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# for using open-source llms
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
# Example export HF_CACHE_DIR=$WORKDIR so that no need to redownload every time
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded>
|
||||
|
||||
```
|
||||
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch Tool service
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
5. Launch `Agent` service
|
||||
|
||||
To use open-source LLMs on Gaudi2, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
```
|
||||
|
||||
6. [Optional] Build `Agent` docker image if pulling images failed.
|
||||
|
||||
```
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tei-embedding-service:
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
tei-reranking-service:
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:1.67.0
|
||||
container_name: jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
COLLECTOR_ZIPKIN_HOST_PORT: 9411
|
||||
restart: unless-stopped
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.52.0
|
||||
container_name: prometheus
|
||||
user: root
|
||||
volumes:
|
||||
- ./prometheus.yaml:/etc/prometheus/prometheus.yaml
|
||||
- ./prometheus_data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||||
ports:
|
||||
- '9091:9090'
|
||||
ipc: host
|
||||
restart: unless-stopped
|
||||
grafana:
|
||||
image: grafana/grafana:11.0.0
|
||||
container_name: grafana
|
||||
volumes:
|
||||
- ./grafana_data:/var/lib/grafana
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning
|
||||
user: root
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||
GF_RENDERING_CALLBACK_URL: http://grafana:3000/
|
||||
GF_LOG_FILTERS: rendering:debug
|
||||
depends_on:
|
||||
- prometheus
|
||||
ports:
|
||||
- '3000:3000'
|
||||
ipc: host
|
||||
restart: unless-stopped
|
||||
node-exporter:
|
||||
image: prom/node-exporter
|
||||
container_name: node-exporter
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- --collector.filesystem.ignored-mount-points
|
||||
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
|
||||
ports:
|
||||
- 9100:9100
|
||||
restart: always
|
||||
deploy:
|
||||
mode: global
|
||||
gaudi-exporter:
|
||||
image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:1.19.2-32
|
||||
container_name: gaudi-exporter
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
- /dev:/dev
|
||||
ports:
|
||||
- 41612:41611
|
||||
restart: always
|
||||
deploy:
|
||||
mode: global
|
||||
worker-rag-agent:
|
||||
environment:
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
worker-sql-agent:
|
||||
environment:
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
supervisor-react-agent:
|
||||
environment:
|
||||
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
|
||||
@@ -1,9 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
supervisor-react-agent:
|
||||
environment:
|
||||
- tools=/home/user/tools/supervisor_agent_webtools.yaml
|
||||
- GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
|
||||
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
|
||||
@@ -3,9 +3,10 @@
|
||||
|
||||
services:
|
||||
worker-rag-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9095:9095"
|
||||
@@ -13,15 +14,14 @@ services:
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent_llama
|
||||
with_memory: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
streaming: false
|
||||
tools: /home/user/tools/worker_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
|
||||
@@ -33,42 +33,14 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-worker-agent-service"
|
||||
port: 9095
|
||||
|
||||
worker-sql-agent:
|
||||
image: opea/agent:latest
|
||||
container_name: sql-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # test db
|
||||
ports:
|
||||
- "9096:9096"
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: sql_agent_llama
|
||||
with_memory: false
|
||||
db_name: ${db_name}
|
||||
db_path: ${db_path}
|
||||
use_hints: false
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: vllm
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: false
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
port: 9096
|
||||
|
||||
supervisor-react-agent:
|
||||
image: opea/agent:latest
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- worker-rag-agent
|
||||
- worker-sql-agent
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9090:9090"
|
||||
@@ -76,15 +48,14 @@ services:
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_llama
|
||||
with_memory: true
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: vllm
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||
model: ${LLM_MODEL_ID}
|
||||
temperature: ${temperature}
|
||||
max_new_tokens: ${max_new_tokens}
|
||||
stream: true
|
||||
streaming: false
|
||||
tools: /home/user/tools/supervisor_agent_tools.yaml
|
||||
require_human_feedback: false
|
||||
no_proxy: ${no_proxy}
|
||||
@@ -95,49 +66,4 @@ services:
|
||||
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
|
||||
CRAG_SERVER: $CRAG_SERVER
|
||||
WORKER_AGENT_URL: $WORKER_AGENT_URL
|
||||
SQL_AGENT_URL: $SQL_AGENT_URL
|
||||
port: 9090
|
||||
mock-api:
|
||||
image: docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
container_name: mock-api
|
||||
ports:
|
||||
- "8080:8000"
|
||||
ipc: host
|
||||
agent-ui:
|
||||
image: opea/agent-ui
|
||||
container_name: agent-ui
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env
|
||||
environment:
|
||||
host_ip: ${host_ip}
|
||||
ports:
|
||||
- "5173:5173"
|
||||
ipc: host
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
|
||||
container_name: vllm-gaudi-server
|
||||
ports:
|
||||
- "8086:8000"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||
VLLM_SKIP_WARMUP: true
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://$host_ip:8086/health || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 100
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 16384
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
rm *.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
|
||||
mv chatqna_megaservice_grafana.json agentqna_microervices_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json
|
||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana.json
|
||||
@@ -1,14 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -1,54 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# config file version
|
||||
apiVersion: 1
|
||||
|
||||
# list of datasources that should be deleted from the database
|
||||
deleteDatasources:
|
||||
- name: Prometheus
|
||||
orgId: 1
|
||||
|
||||
# list of datasources to insert/update depending
|
||||
# what's available in the database
|
||||
datasources:
|
||||
# <string, required> name of the datasource. Required
|
||||
- name: Prometheus
|
||||
# <string, required> datasource type. Required
|
||||
type: prometheus
|
||||
# <string, required> access mode. direct or proxy. Required
|
||||
access: proxy
|
||||
# <int> org id. will default to orgId 1 if not specified
|
||||
orgId: 1
|
||||
# <string> url
|
||||
url: http://prometheus:9090
|
||||
# <string> database password, if used
|
||||
password:
|
||||
# <string> database user, if used
|
||||
user:
|
||||
# <string> database name, if used
|
||||
database:
|
||||
# <bool> enable/disable basic auth
|
||||
basicAuth: false
|
||||
# <string> basic auth username, if used
|
||||
basicAuthUser:
|
||||
# <string> basic auth password, if used
|
||||
basicAuthPassword:
|
||||
# <bool> enable/disable with credentials headers
|
||||
withCredentials:
|
||||
# <bool> mark as default datasource. Max one per org
|
||||
isDefault: true
|
||||
# <map> fields that will be converted to json and stored in json_data
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
graphiteVersion: "1.1"
|
||||
tlsAuth: false
|
||||
tlsAuthWithCACert: false
|
||||
# <string> json object of data that will be encrypted.
|
||||
secureJsonData:
|
||||
tlsCACert: "..."
|
||||
tlsClientCert: "..."
|
||||
tlsClientKey: "..."
|
||||
version: 1
|
||||
# <bool> allow users to edit datasources from the UI.
|
||||
editable: true
|
||||
@@ -0,0 +1,32 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
WORKPATH=$(dirname "$PWD")/..
|
||||
# export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:8085"
|
||||
export temperature=0.01
|
||||
export max_new_tokens=4096
|
||||
|
||||
# agent related environment variables
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
echo "TOOLSET_PATH=${TOOLSET_PATH}"
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
docker compose -f compose.yaml up -d
|
||||
25
AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
Normal file
25
AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
|
||||
docker compose -f tgi_gaudi.yaml up -d
|
||||
|
||||
sleep 5s
|
||||
echo "Waiting tgi gaudi ready"
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs tgi-server &> tgi-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q Connected tgi-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
@@ -1,55 +0,0 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
global:
|
||||
scrape_interval: 5s
|
||||
external_labels:
|
||||
monitor: "my-monitor"
|
||||
scrape_configs:
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
- targets: ["prometheus:9090"]
|
||||
- job_name: "vllm"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["vllm-gaudi-server:8000"]
|
||||
- job_name: "tgi"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["tgi-gaudi-server:80"]
|
||||
- job_name: "tei-embedding"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["tei-embedding-server:80"]
|
||||
- job_name: "tei-reranking"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["tei-reranking-server:80"]
|
||||
- job_name: "retriever"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["retriever:7000"]
|
||||
- job_name: "dataprep-redis-service"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["dataprep-redis-service:5000"]
|
||||
- job_name: "prometheus-node-exporter"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["node-exporter:9100"]
|
||||
- job_name: "prometheus-gaudi-exporter"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["gaudi-exporter:41611"]
|
||||
- job_name: "supervisor-react-agent"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["react-agent-endpoint:9090"]
|
||||
- job_name: "worker-rag-agent"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["rag-agent-endpoint:9095"]
|
||||
- job_name: "worker-sql-agent"
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["sql-agent-endpoint:9096"]
|
||||
@@ -1,72 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
WORKPATH=$(dirname "$PWD")/..
|
||||
# export WORKDIR=$WORKPATH/../../
|
||||
if [[ -z "${WORKDIR}" ]]; then
|
||||
echo "Please set WORKDIR environment variable"
|
||||
exit 0
|
||||
fi
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:8086"
|
||||
export temperature=0
|
||||
export max_new_tokens=4096
|
||||
|
||||
# agent related environment variables
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
echo "TOOLSET_PATH=${TOOLSET_PATH}"
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
export db_name=Chinook
|
||||
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
|
||||
if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
|
||||
echo "Download Chinook_Sqlite!"
|
||||
wget -O $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite https://github.com/lerocha/chinook-database/releases/download/v1.4.5/Chinook_Sqlite.sqlite
|
||||
fi
|
||||
|
||||
# configure agent ui
|
||||
echo "AGENT_URL = 'http://$ip_address:9090/v1/chat/completions'" | tee ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env
|
||||
|
||||
# retriever
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
# Set OpenTelemetry Tracing Endpoint
|
||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||
|
||||
export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,node-exporter,gaudi-exporter,127.0.0.1,localhost,0.0.0.0,$host_ip,,$JAEGER_IP"
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
|
||||
@@ -2,27 +2,12 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
agent:
|
||||
agent-langchain:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/agent/src/Dockerfile
|
||||
dockerfile: comps/agent/langchain/Dockerfile
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
image: ${REGISTRY:-opea}/agent:${TAG:-latest}
|
||||
agent-ui:
|
||||
build:
|
||||
context: ../ui
|
||||
dockerfile: ./docker/Dockerfile
|
||||
extends: agent
|
||||
image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
|
||||
vllm-rocm:
|
||||
build:
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
context: GenAIComps
|
||||
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
|
||||
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/agent-langchain:${TAG:-latest}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
# Deploy AgentQnA on Kubernetes cluster
|
||||
|
||||
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||
|
||||
## Deploy on Gaudi
|
||||
|
||||
```
|
||||
export HFTOKEN="insert-your-huggingface-token-here"
|
||||
helm install agentqna oci://ghcr.io/opea-project/charts/agentqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||
```
|
||||
@@ -1,16 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Accelerate inferencing in heaviest components to improve performance
|
||||
# by overriding their subchart values
|
||||
|
||||
vllm:
|
||||
enabled: true
|
||||
image:
|
||||
repository: opea/vllm-gaudi
|
||||
supervisor:
|
||||
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||
ragagent:
|
||||
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||
sqlagent:
|
||||
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||
@@ -53,7 +53,7 @@ def main():
|
||||
host_ip = args.host_ip
|
||||
port = args.port
|
||||
proxies = {"http": ""}
|
||||
url = "http://{host_ip}:{port}/v1/dataprep/ingest".format(host_ip=host_ip, port=port)
|
||||
url = "http://{host_ip}:{port}/v1/dataprep".format(host_ip=host_ip, port=port)
|
||||
|
||||
# Split jsonl file into json files
|
||||
files = split_jsonl_into_txts(os.path.join(args.filedir, args.filename))
|
||||
|
||||
@@ -13,14 +13,13 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
|
||||
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
|
||||
|
||||
@@ -15,35 +15,28 @@ function stop_agent_and_api_server() {
|
||||
echo "Stopping CRAG server"
|
||||
docker stop $(docker ps -q --filter ancestor=docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0)
|
||||
echo "Stopping Agent services"
|
||||
docker stop $(docker ps -q --filter ancestor=opea/agent:latest)
|
||||
docker stop $(docker ps -q --filter ancestor=opea/agent-langchain:latest)
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
echo "Stopping Retrieval tool"
|
||||
local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
|
||||
cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
docker compose -f $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/docker/docker-compose-retrieval-tool.yaml down
|
||||
}
|
||||
|
||||
echo "=================== #1 Building docker images===================="
|
||||
bash step1_build_images.sh
|
||||
bash 1_build_images.sh
|
||||
echo "=================== #1 Building docker images completed===================="
|
||||
|
||||
echo "=================== #2 Start retrieval tool===================="
|
||||
bash step2_start_retrieval_tool.sh
|
||||
bash 2_start_retrieval_tool.sh
|
||||
echo "=================== #2 Retrieval tool started===================="
|
||||
|
||||
echo "=================== #3 Ingest data and validate retrieval===================="
|
||||
bash step3_ingest_data_and_validate_retrieval.sh
|
||||
bash 3_ingest_data_and_validate_retrieval.sh
|
||||
echo "=================== #3 Data ingestion and validation completed===================="
|
||||
|
||||
echo "=================== #4 Start agent and API server===================="
|
||||
bash step4_launch_and_validate_agent_openai.sh
|
||||
bash 4_launch_and_validate_agent_openai.sh
|
||||
echo "=================== #4 Agent test passed ===================="
|
||||
|
||||
echo "=================== #5 Stop agent and API server===================="
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
DATAPATH=$WORKDIR/TAG-Bench/tag_queries.csv
|
||||
OUTFOLDER=$WORKDIR/TAG-Bench/query_by_db
|
||||
python3 split_data.py --path $DATAPATH --output $OUTFOLDER
|
||||
@@ -1,27 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--path", type=str, required=True)
|
||||
parser.add_argument("--output", type=str, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
# if output folder does not exist, create it
|
||||
if not os.path.exists(args.output):
|
||||
os.makedirs(args.output)
|
||||
|
||||
# Load the data
|
||||
data = pd.read_csv(args.path)
|
||||
|
||||
# Split the data by domain
|
||||
domains = data["DB used"].unique()
|
||||
for domain in domains:
|
||||
domain_data = data[data["DB used"] == domain]
|
||||
out = os.path.join(args.output, f"query_{domain}.csv")
|
||||
domain_data.to_csv(out, index=False)
|
||||
@@ -11,18 +11,19 @@ export ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function get_genai_comps() {
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function build_docker_images_for_retrieval_tool(){
|
||||
cd $WORKDIR/GenAIExamples/DocIndexRetriever/docker_image_build/
|
||||
# git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
get_genai_comps
|
||||
echo "Build all the images with --no-cache..."
|
||||
service_list="doc-index-retriever dataprep embedding retriever reranking"
|
||||
service_list="doc-index-retriever dataprep-redis embedding-tei retriever-redis reranking-tei"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
@@ -34,26 +35,6 @@ function build_agent_docker_image() {
|
||||
docker compose -f build.yaml build --no-cache
|
||||
}
|
||||
|
||||
function build_vllm_docker_image() {
|
||||
echo "Building the vllm docker image"
|
||||
cd $WORKPATH
|
||||
echo $WORKPATH
|
||||
if [ ! -d "./vllm-fork" ]; then
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
fi
|
||||
cd ./vllm-fork
|
||||
VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
|
||||
git checkout ${VLLM_VER} &> /dev/null
|
||||
docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:ci --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "opea/vllm-gaudi:ci failed"
|
||||
exit 1
|
||||
else
|
||||
echo "opea/vllm-gaudi:ci successful"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
echo "==================== Build docker images for retrieval tool ===================="
|
||||
build_docker_images_for_retrieval_tool
|
||||
@@ -62,12 +43,6 @@ function main() {
|
||||
echo "==================== Build agent docker image ===================="
|
||||
build_agent_docker_image
|
||||
echo "==================== Build agent docker image completed ===================="
|
||||
|
||||
echo "==================== Build vllm docker image ===================="
|
||||
build_vllm_docker_image
|
||||
echo "==================== Build vllm docker image completed ===================="
|
||||
|
||||
docker image ls | grep vllm
|
||||
}
|
||||
|
||||
main
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
export WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=${WORKPATH}/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
|
||||
function get_genai_comps() {
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function build_docker_images_for_retrieval_tool(){
|
||||
cd $WORKPATH/../DocIndexRetriever/docker_image_build/
|
||||
get_genai_comps
|
||||
echo "Build all the images with --no-cache..."
|
||||
service_list="doc-index-retriever dataprep embedding retriever reranking"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 3s
|
||||
}
|
||||
|
||||
function build_agent_docker_image() {
|
||||
cd $WORKPATH/docker_image_build/
|
||||
get_genai_comps
|
||||
echo "Build agent image with --no-cache..."
|
||||
docker compose -f build.yaml build --no-cache
|
||||
|
||||
docker images && sleep 3s
|
||||
}
|
||||
|
||||
#function build_vllm_docker_image() {
|
||||
# echo "Building the vllm docker image"
|
||||
# cd $WORKPATH/
|
||||
# docker build --no-cache -t opea/llm-vllm-rocm:ci -f Dockerfile-vllm-rocm .
|
||||
#
|
||||
# docker images && sleep 3s
|
||||
#}
|
||||
|
||||
|
||||
function main() {
|
||||
echo "==================== Build docker images for retrieval tool ===================="
|
||||
build_docker_images_for_retrieval_tool
|
||||
echo "==================== Build docker images for retrieval tool completed ===================="
|
||||
|
||||
echo "==================== Build agent docker image ===================="
|
||||
build_agent_docker_image
|
||||
echo "==================== Build agent docker image completed ===================="
|
||||
|
||||
# echo "==================== Build vllm docker image ===================="
|
||||
# build_vllm_docker_image
|
||||
# echo "==================== Build vllm docker image completed ===================="
|
||||
|
||||
docker image ls | grep vllm
|
||||
}
|
||||
|
||||
main
|
||||
@@ -7,9 +7,8 @@ WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=${ip_address}
|
||||
|
||||
export HF_CACHE_DIR=${model_cache:-"$WORKDIR/hf_cache"}
|
||||
export HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
echo "Creating HF_CACHE directory"
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=${ip_address}
|
||||
|
||||
export HF_CACHE_DIR=$WORKPATH/hf_cache
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
echo "Creating HF_CACHE directory"
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
fi
|
||||
|
||||
function start_retrieval_tool() {
|
||||
echo "Starting Retrieval tool"
|
||||
cd $WORKPATH/../DocIndexRetriever/docker_compose/intel/cpu/xeon
|
||||
host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export no_proxy=${no_proxy}
|
||||
export http_proxy=${http_proxy}
|
||||
export https_proxy=${https_proxy}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
|
||||
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export RERANK_TYPE="tei"
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
export RERANK_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
||||
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
|
||||
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
|
||||
|
||||
docker compose -f compose.yaml up -d
|
||||
}
|
||||
|
||||
echo "==================== Start retrieval tool ===================="
|
||||
start_retrieval_tool
|
||||
sleep 20 # needed for downloading the models
|
||||
echo "==================== Retrieval tool started ===================="
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=$ip_address
|
||||
echo "ip_address=${ip_address}"
|
||||
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function ingest_data_and_validate() {
|
||||
echo "Ingesting data"
|
||||
cd $WORKPATH/retrieval_tool/
|
||||
echo $PWD
|
||||
local CONTENT=$(bash run_ingest_data.sh)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-server")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs dataprep-redis-server
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_retrieval_tool() {
|
||||
echo "----------------Test retrieval tool ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Who sang Thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "retrieval-tool")
|
||||
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs retrievaltool-xeon-backend-server
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function main(){
|
||||
|
||||
echo "==================== Ingest data ===================="
|
||||
ingest_data_and_validate
|
||||
echo "==================== Data ingestion completed ===================="
|
||||
|
||||
echo "==================== Validate retrieval tool ===================="
|
||||
validate_retrieval_tool
|
||||
echo "==================== Retrieval tool validated ===================="
|
||||
}
|
||||
|
||||
main
|
||||
@@ -1,214 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
model="meta-llama/Llama-3.3-70B-Instruct" #"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
|
||||
export HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
fi
|
||||
echo "HF_CACHE_DIR=$HF_CACHE_DIR"
|
||||
ls $HF_CACHE_DIR
|
||||
|
||||
vllm_port=8086
|
||||
vllm_volume=${HF_CACHE_DIR}
|
||||
|
||||
function start_tgi(){
|
||||
echo "Starting tgi-gaudi server"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
source set_env.sh
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml tgi_gaudi.yaml -f compose.telemetry.yaml up -d
|
||||
|
||||
}
|
||||
|
||||
function start_all_services() {
|
||||
|
||||
echo "token is ${HF_TOKEN}"
|
||||
|
||||
echo "start vllm gaudi service"
|
||||
echo "**************model is $model**************"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
source set_env.sh
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml -f compose.telemetry.yaml up -d
|
||||
sleep 5s
|
||||
echo "Waiting vllm gaudi ready"
|
||||
n=0
|
||||
LOG_PATH=$PWD
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs vllm-gaudi-server
|
||||
docker logs vllm-gaudi-server &> ${LOG_PATH}/vllm-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q "Uvicorn running on" ${LOG_PATH}/vllm-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
if grep -q "No such container" ${LOG_PATH}/vllm-gaudi-service.log; then
|
||||
echo "container vllm-gaudi-server not found"
|
||||
exit 1
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
}
|
||||
|
||||
function download_chinook_data(){
|
||||
echo "Downloading chinook data..."
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
|
||||
}
|
||||
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_agent_service() {
|
||||
# # test worker rag agent
|
||||
echo "======================Testing worker rag agent======================"
|
||||
export agent_port="9095"
|
||||
export agent_ip="127.0.0.1"
|
||||
prompt="Tell me about Michael Jackson song Thriller"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ip_addr $agent_ip --ext_port $agent_port)
|
||||
# echo $CONTENT
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs rag-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# # test worker sql agent
|
||||
echo "======================Testing worker sql agent======================"
|
||||
export agent_port="9096"
|
||||
prompt="How many employees are there in the company?"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ip_addr $agent_ip --ext_port $agent_port)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
|
||||
echo $CONTENT
|
||||
# echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs sql-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test supervisor react agent
|
||||
echo "======================Testing supervisor react agent======================"
|
||||
export agent_port="9090"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ip_addr $agent_ip --ext_port $agent_port --stream)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
|
||||
# echo $CONTENT
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs react-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function remove_chinook_data(){
|
||||
echo "Removing chinook data..."
|
||||
cd $WORKDIR
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
}
|
||||
|
||||
export host_ip=$ip_address
|
||||
echo "ip_address=${ip_address}"
|
||||
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function ingest_data_and_validate() {
|
||||
echo "Ingesting data"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool/
|
||||
echo $PWD
|
||||
local CONTENT=$(bash run_ingest_data.sh)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-server")
|
||||
echo "$EXIT_CODE"
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "return value is $EXIT_CODE"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs dataprep-redis-server
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_retrieval_tool() {
|
||||
echo "----------------Test retrieval tool ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
|
||||
"text": "Who sang Thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "retrieval-tool")
|
||||
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs retrievaltool-xeon-backend-server
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Prepare data ===================="
|
||||
download_chinook_data
|
||||
echo "==================== Data prepare done ===================="
|
||||
|
||||
echo "==================== Start all services ===================="
|
||||
start_all_services
|
||||
echo "==================== all services started ===================="
|
||||
|
||||
echo "==================== Ingest data ===================="
|
||||
ingest_data_and_validate
|
||||
echo "==================== Data ingestion completed ===================="
|
||||
|
||||
echo "==================== Validate retrieval tool ===================="
|
||||
validate_retrieval_tool
|
||||
echo "==================== Retrieval tool validated ===================="
|
||||
|
||||
echo "==================== Validate agent service ===================="
|
||||
validate_agent_service
|
||||
echo "==================== Agent service validated ===================="
|
||||
}
|
||||
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
main
|
||||
|
||||
remove_chinook_data
|
||||
@@ -11,22 +11,13 @@ echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
|
||||
|
||||
function download_chinook_data(){
|
||||
echo "Downloading chinook data..."
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite $WORKDIR/GenAIExamples/AgentQnA/tests/
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting CRAG server"
|
||||
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
sleep 2m
|
||||
}
|
||||
|
||||
function validate() {
|
||||
@@ -44,64 +35,19 @@ function validate() {
|
||||
}
|
||||
|
||||
function validate_agent_service() {
|
||||
# # test worker rag agent
|
||||
echo "======================Testing worker rag agent======================"
|
||||
export agent_port="9095"
|
||||
prompt="Tell me about Michael Jackson song Thriller"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
# echo $CONTENT
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "----------------Test agent ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs rag-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# # test worker sql agent
|
||||
echo "======================Testing worker sql agent======================"
|
||||
export agent_port="9096"
|
||||
prompt="How many employees are there in the company?"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
|
||||
echo $CONTENT
|
||||
# echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs sql-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test supervisor react agent
|
||||
echo "======================Testing supervisor react agent======================"
|
||||
export agent_port="9090"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
|
||||
# echo $CONTENT
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs react-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function remove_chinook_data(){
|
||||
echo "Removing chinook data..."
|
||||
cd $WORKDIR
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
echo "==================== Prepare data ===================="
|
||||
download_chinook_data
|
||||
echo "==================== Data prepare done ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
@@ -111,9 +57,4 @@ function main() {
|
||||
echo "==================== Agent service validated ===================="
|
||||
}
|
||||
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
main
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export LOG_PATH=${WORKPATH}
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=${ip_address}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
|
||||
export HF_CACHE_DIR=$WORKPATH/data2/huggingface
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
fi
|
||||
|
||||
function download_chinook_data(){
|
||||
echo "Downloading chinook data..."
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ${WORKPATH}/tests/
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash launch_agent_service_vllm_rocm.sh
|
||||
}
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_agent_service() {
|
||||
# # test worker rag agent
|
||||
echo "======================Testing worker rag agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_RAG_AGENT_PORT_tmp)
|
||||
prompt="Tell me about Michael Jackson song Thriller"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
# echo $CONTENT
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs rag-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test worker sql agent
|
||||
echo "======================Testing worker sql agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_SQL_AGENT_PORT_tmp)
|
||||
prompt="How many employees are there in the company?"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
|
||||
echo $CONTENT
|
||||
# echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs sql-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test supervisor react agent
|
||||
echo "======================Testing supervisor react agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/SUPERVISOR_REACT_AGENT_PORT_tmp)
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
|
||||
# echo $CONTENT
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs react-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function remove_chinook_data(){
|
||||
echo "Removing chinook data..."
|
||||
cd $WORKDIR
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Prepare data ===================="
|
||||
download_chinook_data
|
||||
echo "==================== Data prepare done ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
|
||||
echo "==================== Validate agent service ===================="
|
||||
validate_agent_service
|
||||
echo "==================== Agent service validated ===================="
|
||||
}
|
||||
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
main
|
||||
|
||||
remove_chinook_data
|
||||
91
AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
Normal file
91
AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
fi
|
||||
ls $HF_CACHE_DIR
|
||||
|
||||
function start_tgi(){
|
||||
echo "Starting tgi-gaudi server"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting CRAG server"
|
||||
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
sleep 10
|
||||
}
|
||||
|
||||
function validate() {
|
||||
local CONTENT="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
|
||||
echo 0
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
echo 1
|
||||
fi
|
||||
}
|
||||
|
||||
function validate_agent_service() {
|
||||
echo "----------------Test agent ----------------"
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9095"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
docker logs rag-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9090"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Start TGI ===================="
|
||||
start_tgi
|
||||
echo "==================== TGI started ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
|
||||
echo "==================== Validate agent service ===================="
|
||||
validate_agent_service
|
||||
echo "==================== Agent service validated ===================="
|
||||
}
|
||||
|
||||
main
|
||||
@@ -2,30 +2,26 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
set -ex
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export LOG_PATH=${WORKPATH}
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export host_ip=${ip_address}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
export HF_CACHE_DIR=$WORKPATH/data2/huggingface
|
||||
export HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
HF_CACHE_DIR=$WORKDIR/hf_cache
|
||||
mkdir -p "$HF_CACHE_DIR"
|
||||
fi
|
||||
ls $HF_CACHE_DIR
|
||||
|
||||
function download_chinook_data(){
|
||||
echo "Downloading chinook data..."
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/lerocha/chinook-database.git
|
||||
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ${WORKPATH}/tests/
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting CRAG server"
|
||||
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm
|
||||
bash launch_agent_service_tgi_rocm.sh
|
||||
@@ -46,63 +42,28 @@ function validate() {
|
||||
}
|
||||
|
||||
function validate_agent_service() {
|
||||
# # test worker rag agent
|
||||
echo "======================Testing worker rag agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_RAG_AGENT_PORT_tmp)
|
||||
prompt="Tell me about Michael Jackson song Thriller"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
# echo $CONTENT
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
echo "----------------Test agent ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs rag-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs rag-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test worker sql agent
|
||||
echo "======================Testing worker sql agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/WORKER_SQL_AGENT_PORT_tmp)
|
||||
prompt="How many employees are there in the company?"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "8" "sql-agent-endpoint")
|
||||
echo $CONTENT
|
||||
# echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs sql-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# test supervisor react agent
|
||||
echo "======================Testing supervisor react agent======================"
|
||||
export agent_port=$(cat ${WORKPATH}/docker_compose/amd/gpu/SUPERVISOR_REACT_AGENT_PORT_tmp)
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Iron" "react-agent-endpoint")
|
||||
# echo $CONTENT
|
||||
echo $EXIT_CODE
|
||||
local EXIT_CODE="${EXIT_CODE:0-1}"
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
docker logs react-agent-endpoint
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
function remove_chinook_data(){
|
||||
echo "Removing chinook data..."
|
||||
cd $WORKDIR
|
||||
if [ -d "chinook-database" ]; then
|
||||
rm -rf chinook-database
|
||||
fi
|
||||
echo "Chinook data removed!"
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Prepare data ===================="
|
||||
download_chinook_data
|
||||
echo "==================== Data prepare done ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
@@ -112,9 +73,4 @@ function main() {
|
||||
echo "==================== Agent service validated ===================="
|
||||
}
|
||||
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
main
|
||||
|
||||
remove_chinook_data
|
||||
|
||||
@@ -1,77 +1,25 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import uuid
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def process_request(url, query, is_stream=False):
|
||||
def generate_answer_agent_api(url, prompt):
|
||||
proxies = {"http": ""}
|
||||
content = json.dumps(query) if query is not None else None
|
||||
try:
|
||||
resp = requests.post(url=url, data=content, proxies=proxies, stream=is_stream)
|
||||
if not is_stream:
|
||||
ret = resp.json()["text"]
|
||||
else:
|
||||
for line in resp.iter_lines(decode_unicode=True):
|
||||
print(line)
|
||||
ret = None
|
||||
|
||||
resp.raise_for_status() # Raise an exception for unsuccessful HTTP status codes
|
||||
return ret
|
||||
except requests.exceptions.RequestException as e:
|
||||
ret = f"An error occurred:{e}"
|
||||
return None
|
||||
|
||||
|
||||
def test_worker_agent(args):
|
||||
url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
|
||||
query = {"role": "user", "messages": args.prompt, "stream": "false"}
|
||||
ret = process_request(url, query)
|
||||
print("Response: ", ret)
|
||||
|
||||
|
||||
def add_message_and_run(url, user_message, thread_id, stream=False):
|
||||
print("User message: ", user_message)
|
||||
query = {"role": "user", "messages": user_message, "thread_id": thread_id, "stream": stream}
|
||||
ret = process_request(url, query, is_stream=stream)
|
||||
print("Response: ", ret)
|
||||
|
||||
|
||||
def test_chat_completion_multi_turn(args):
|
||||
url = f"http://{args.ip_addr}:{args.ext_port}/v1/chat/completions"
|
||||
thread_id = f"{uuid.uuid4()}"
|
||||
|
||||
# first turn
|
||||
print("===============First turn==================")
|
||||
user_message = "Which artist has the most albums in the database?"
|
||||
add_message_and_run(url, user_message, thread_id, stream=args.stream)
|
||||
print("===============End of first turn==================")
|
||||
|
||||
# second turn
|
||||
print("===============Second turn==================")
|
||||
user_message = "Give me a few examples of the artist's albums?"
|
||||
add_message_and_run(url, user_message, thread_id, stream=args.stream)
|
||||
print("===============End of second turn==================")
|
||||
payload = {
|
||||
"query": prompt,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
answer = response.json()["text"]
|
||||
return answer
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--ip_addr", type=str, default="127.0.0.1", help="endpoint ip address")
|
||||
parser.add_argument("--ext_port", type=str, default="9090", help="endpoint port")
|
||||
parser.add_argument("--stream", action="store_true", help="streaming mode")
|
||||
parser.add_argument("--prompt", type=str, help="prompt message")
|
||||
parser.add_argument("--agent_role", type=str, default="supervisor", help="supervisor or worker")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
print(args)
|
||||
|
||||
if args.agent_role == "supervisor":
|
||||
test_chat_completion_multi_turn(args)
|
||||
elif args.agent_role == "worker":
|
||||
test_worker_agent(args)
|
||||
else:
|
||||
raise ValueError("Invalid agent role")
|
||||
ip_address = os.getenv("ip_address", "localhost")
|
||||
agent_port = os.getenv("agent_port", "9095")
|
||||
url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
|
||||
prompt = "Tell me about Michael Jackson song thriller"
|
||||
answer = generate_answer_agent_api(url, prompt)
|
||||
print(answer)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
set -xe
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
@@ -9,22 +10,6 @@ echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-gaudi-server,jaeger,grafana,prometheus,127.0.0.1,localhost,0.0.0.0,$ip_address"
|
||||
|
||||
|
||||
function get_genai_comps() {
|
||||
if [ ! -d "GenAIComps" ] ; then
|
||||
git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function build_agent_docker_image() {
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/
|
||||
get_genai_comps
|
||||
echo "Build agent image with --no-cache..."
|
||||
docker compose -f build.yaml build --no-cache
|
||||
}
|
||||
|
||||
function stop_crag() {
|
||||
cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
|
||||
@@ -34,10 +19,15 @@ function stop_crag() {
|
||||
|
||||
function stop_agent_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
}
|
||||
|
||||
function stop_llm(){
|
||||
function stop_tgi(){
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
@@ -46,14 +36,6 @@ function stop_llm(){
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
|
||||
cid=$(docker ps -aq --filter "name=vllm-gaudi-server")
|
||||
echo "Stopping container $cid"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
|
||||
cid=$(docker ps -aq --filter "name=test-comps-vllm-gaudi-service")
|
||||
echo "Stopping container $cid"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
@@ -70,23 +52,34 @@ function stop_retrieval_tool() {
|
||||
echo "workpath: $WORKPATH"
|
||||
echo "=================== Stop containers ===================="
|
||||
stop_crag
|
||||
stop_tgi
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
|
||||
cd $WORKPATH/tests
|
||||
|
||||
echo "=================== #1 Building docker images===================="
|
||||
build_agent_docker_image
|
||||
bash step1_build_images.sh
|
||||
echo "=================== #1 Building docker images completed===================="
|
||||
|
||||
echo "=================== #4 Start agent, API server, retrieval, and ingest data===================="
|
||||
bash $WORKPATH/tests/step4_launch_and_validate_agent_gaudi.sh
|
||||
echo "=================== #4 Agent, retrieval test passed ===================="
|
||||
echo "=================== #2 Start retrieval tool===================="
|
||||
bash step2_start_retrieval_tool.sh
|
||||
echo "=================== #2 Retrieval tool started===================="
|
||||
|
||||
echo "=================== #3 Ingest data and validate retrieval===================="
|
||||
bash step3_ingest_data_and_validate_retrieval.sh
|
||||
echo "=================== #3 Data ingestion and validation completed===================="
|
||||
|
||||
echo "=================== #4 Start agent and API server===================="
|
||||
bash step4_launch_and_validate_agent_tgi.sh
|
||||
echo "=================== #4 Agent test passed ===================="
|
||||
|
||||
echo "=================== #5 Stop agent and API server===================="
|
||||
stop_crag
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
echo "=================== #5 Agent and API server stopped===================="
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
echo "ALL DONE!!"
|
||||
echo "ALL DONE!"
|
||||
|
||||
@@ -2,16 +2,14 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
ls $WORKPATH
|
||||
export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
export MODEL_CACHE="./data"
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
|
||||
function stop_crag() {
|
||||
cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
|
||||
@@ -21,7 +19,13 @@ function stop_crag() {
|
||||
|
||||
function stop_agent_docker() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||
bash stop_agent_service_tgi_rocm.sh
|
||||
# docker compose -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
@@ -68,4 +72,4 @@ echo "=================== #5 Agent and API server stopped===================="
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
echo "ALL DONE!!"
|
||||
echo "ALL DONE!"
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
export LOG_PATH=${WORKPATH}
|
||||
export WORKDIR=${WORKPATH}/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export TOOLSET_PATH=$WORKPATH/tools/
|
||||
export MODEL_CACHE="./data"
|
||||
|
||||
function stop_crag() {
|
||||
cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service")
|
||||
echo "Stopping container kdd-cup-24-crag-service with cid $cid"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
}
|
||||
|
||||
function stop_agent_docker() {
|
||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||
bash stop_agent_service_vllm_rocm.sh
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
echo "Stopping Retrieval tool"
|
||||
local RETRIEVAL_TOOL_PATH=$WORKDIR/GenAIExamples/DocIndexRetriever
|
||||
cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml down
|
||||
}
|
||||
|
||||
echo "workpath: $WORKPATH"
|
||||
echo "=================== Stop containers ===================="
|
||||
stop_crag
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
|
||||
cd $WORKPATH/tests
|
||||
|
||||
echo "=================== #1 Building docker images===================="
|
||||
bash step1_build_images_rocm_vllm.sh
|
||||
echo "=================== #1 Building docker images completed===================="
|
||||
|
||||
echo "=================== #2 Start retrieval tool===================="
|
||||
bash step2_start_retrieval_tool_rocm_vllm.sh
|
||||
echo "=================== #2 Retrieval tool started===================="
|
||||
|
||||
echo "=================== #3 Ingest data and validate retrieval===================="
|
||||
bash step3_ingest_data_and_validate_retrieval_rocm_vllm.sh
|
||||
echo "=================== #3 Data ingestion and validation completed===================="
|
||||
|
||||
echo "=================== #4 Start agent and API server===================="
|
||||
bash step4_launch_and_validate_agent_rocm_vllm.sh
|
||||
echo "=================== #4 Agent test passed ===================="
|
||||
|
||||
echo "=================== #5 Stop agent and API server===================="
|
||||
stop_crag
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
echo "=================== #5 Agent and API server stopped===================="
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
echo "ALL DONE!!"
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
search_knowledge_base:
|
||||
description: Search a knowledge base for a given query. Returns text related to the query.
|
||||
description: Search knowledge base for a given query. Returns text related to the query.
|
||||
callable_api: tools.py:search_knowledge_base
|
||||
args_schema:
|
||||
query:
|
||||
@@ -10,15 +10,6 @@ search_knowledge_base:
|
||||
description: query
|
||||
return_output: retrieved_data
|
||||
|
||||
search_artist_database:
|
||||
description: Search a SQL database on artists and their music with a natural language query. Returns text related to the query.
|
||||
callable_api: tools.py:search_sql_database
|
||||
args_schema:
|
||||
query:
|
||||
type: str
|
||||
description: natural language query
|
||||
return_output: retrieved_data
|
||||
|
||||
get_artist_birth_place:
|
||||
description: Get the birth place of an artist.
|
||||
callable_api: tools.py:get_artist_birth_place
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
search_web_base:
|
||||
description: Search a web base for a given query. Returns text related to the query.
|
||||
callable_api: tools.py:search_web_base
|
||||
args_schema:
|
||||
query:
|
||||
type: str
|
||||
description: query
|
||||
return_output: retrieved_data
|
||||
|
||||
search_knowledge_base:
|
||||
description: Search a knowledge base for a given query. Returns text related to the query.
|
||||
callable_api: tools.py:search_knowledge_base
|
||||
args_schema:
|
||||
query:
|
||||
type: str
|
||||
description: query
|
||||
return_output: retrieved_data
|
||||
|
||||
search_artist_database:
|
||||
description: Search a SQL database on artists and their music with a natural language query. Returns text related to the query.
|
||||
callable_api: tools.py:search_sql_database
|
||||
args_schema:
|
||||
query:
|
||||
type: str
|
||||
description: natural language query
|
||||
return_output: retrieved_data
|
||||
|
||||
get_artist_birth_place:
|
||||
description: Get the birth place of an artist.
|
||||
callable_api: tools.py:get_artist_birth_place
|
||||
args_schema:
|
||||
artist_name:
|
||||
type: str
|
||||
description: artist name
|
||||
return_output: birth_place
|
||||
|
||||
get_billboard_rank_date:
|
||||
description: Get Billboard ranking for a specific rank and date.
|
||||
callable_api: tools.py:get_billboard_rank_date
|
||||
args_schema:
|
||||
rank:
|
||||
type: int
|
||||
description: the rank of interest, for example 1 for top 1
|
||||
date:
|
||||
type: str
|
||||
description: date
|
||||
return_output: billboard_info
|
||||
|
||||
get_song_release_date:
|
||||
description: Get the release date of a song.
|
||||
callable_api: tools.py:get_song_release_date
|
||||
args_schema:
|
||||
song_name:
|
||||
type: str
|
||||
description: song name
|
||||
return_output: release_date
|
||||
|
||||
get_members:
|
||||
description: Get the member list of a band.
|
||||
callable_api: tools.py:get_members
|
||||
args_schema:
|
||||
band_name:
|
||||
type: str
|
||||
description: band name
|
||||
return_output: members
|
||||
|
||||
get_grammy_best_artist_by_year:
|
||||
description: Get the Grammy Best New Artist for a specific year.
|
||||
callable_api: tools.py:get_grammy_best_artist_by_year
|
||||
args_schema:
|
||||
year:
|
||||
type: int
|
||||
description: year
|
||||
return_output: grammy_best_new_artist
|
||||
@@ -4,62 +4,22 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
from comps.cores.telemetry.opea_telemetry import opea_telemetry, tracer
|
||||
from tools.pycragapi import CRAG
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def search_web_base(query: str) -> str:
|
||||
import os
|
||||
|
||||
from langchain_core.tools import Tool
|
||||
from langchain_google_community import GoogleSearchAPIWrapper
|
||||
|
||||
search = GoogleSearchAPIWrapper()
|
||||
|
||||
tool = Tool(
|
||||
name="google_search",
|
||||
description="Search Google for recent results.",
|
||||
func=search.run,
|
||||
)
|
||||
|
||||
response = tool.run(query)
|
||||
return response
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def search_knowledge_base(query: str) -> str:
|
||||
"""Search a knowledge base about music and singers for a given query.
|
||||
|
||||
Returns text related to the query.
|
||||
"""
|
||||
"""Search the knowledge base for a specific query."""
|
||||
# use worker agent (DocGrader) to search the knowledge base
|
||||
url = os.environ.get("WORKER_AGENT_URL")
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"messages": query,
|
||||
"query": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
return response.json()["text"]
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def search_sql_database(query: str) -> str:
|
||||
"""Search a SQL database on artists and their music with a natural language query.
|
||||
|
||||
Returns text related to the query.
|
||||
"""
|
||||
url = os.environ.get("SQL_AGENT_URL")
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"messages": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
return response.json()["text"]
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def get_grammy_best_artist_by_year(year: int) -> dict:
|
||||
"""Get the Grammy Best New Artist for a specific year."""
|
||||
api = CRAG()
|
||||
@@ -67,21 +27,18 @@ def get_grammy_best_artist_by_year(year: int) -> dict:
|
||||
return api.music_grammy_get_best_artist_by_year(year)
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def get_members(band_name: str) -> dict:
|
||||
"""Get the member list of a band."""
|
||||
api = CRAG()
|
||||
return api.music_get_members(band_name)
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def get_artist_birth_place(artist_name: str) -> dict:
|
||||
"""Get the birthplace of an artist."""
|
||||
api = CRAG()
|
||||
return api.music_get_artist_birth_place(artist_name)
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def get_billboard_rank_date(rank: int, date: str = None) -> dict:
|
||||
"""Get Billboard ranking for a specific rank and date."""
|
||||
api = CRAG()
|
||||
@@ -89,7 +46,6 @@ def get_billboard_rank_date(rank: int, date: str = None) -> dict:
|
||||
return api.music_get_billboard_rank_date(rank, date)
|
||||
|
||||
|
||||
@opea_telemetry
|
||||
def get_song_release_date(song_name: str) -> dict:
|
||||
"""Get the release date of a song."""
|
||||
api = CRAG()
|
||||
|
||||
@@ -12,7 +12,7 @@ def search_knowledge_base(query: str) -> str:
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"text": query,
|
||||
"messages": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
print(response)
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
From 799dcc304b3aecf2e2969df47c8dcac16d2267b0 Mon Sep 17 00:00:00 2001
|
||||
From: lkk12014402 <kaokao.lv@intel.com>
|
||||
Date: Fri, 4 Apr 2025 07:40:30 +0000
|
||||
Subject: [PATCH] deal opea agent tool content.
|
||||
|
||||
---
|
||||
backend/open_webui/utils/middleware.py | 54 ++++++++++++++++++++++++++
|
||||
1 file changed, 54 insertions(+)
|
||||
|
||||
diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
|
||||
index 289d887df..afa0edf1e 100644
|
||||
--- a/backend/open_webui/utils/middleware.py
|
||||
+++ b/backend/open_webui/utils/middleware.py
|
||||
@@ -1486,6 +1486,60 @@ async def process_chat_response(
|
||||
try:
|
||||
data = json.loads(data)
|
||||
|
||||
+ tool_content_block = []
|
||||
+ if data.get("tool_name"):
|
||||
+ sources.append(
|
||||
+ {
|
||||
+ "source": {
|
||||
+ "name": f"TOOL:{data.get('tool_name')}"},
|
||||
+ "document": [data.get("tool_content")],
|
||||
+ "metadata": [{
|
||||
+ "source": f"TOOL:{data.get('tool_name')}"}],
|
||||
+ }
|
||||
+ )
|
||||
+ events.append({"sources": sources})
|
||||
+
|
||||
+ await event_emitter(
|
||||
+ {
|
||||
+ "type": "chat:completion",
|
||||
+ "data": {"sources": sources},
|
||||
+ }
|
||||
+ )
|
||||
+ tool_content_block = [
|
||||
+ {
|
||||
+ "type": "tool_calls",
|
||||
+ "content": [
|
||||
+ {"id": data.get('tool_name'), "function": {"name": data.get('tool_name')}}
|
||||
+ ]
|
||||
+ }
|
||||
+ ]
|
||||
+
|
||||
+ await event_emitter(
|
||||
+ {
|
||||
+ "type": "chat:completion",
|
||||
+ "data": {
|
||||
+ "content": serialize_content_blocks(tool_content_block),
|
||||
+ },
|
||||
+ }
|
||||
+ )
|
||||
+
|
||||
+ tool_content_block = [
|
||||
+ {
|
||||
+ "type": "tool_calls",
|
||||
+ "content": [
|
||||
+ {"id": data.get('tool_name'), "function": {"name": data.get('tool_name')}}
|
||||
+ ],
|
||||
+ "results": [
|
||||
+ {"tool_call_id": data.get('tool_name'), "content": data.get("tool_content")}
|
||||
+ ]
|
||||
+ },
|
||||
+ {
|
||||
+ "type": "text",
|
||||
+ "content": "",
|
||||
+ }
|
||||
+ ]
|
||||
+ content_blocks.extend(tool_content_block)
|
||||
+
|
||||
data, _ = await process_filter_functions(
|
||||
request=request,
|
||||
filter_functions=filter_functions,
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@@ -1,9 +1,32 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
ARG BASE_TAG=latest
|
||||
FROM opea/comps-base:$BASE_TAG
|
||||
FROM python:3.11-slim
|
||||
|
||||
COPY ./audioqna.py $HOME/audioqna.py
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./audioqna.py /home/user/audioqna.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "audioqna.py"]
|
||||
|
||||
@@ -1,9 +1,32 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
ARG BASE_TAG=latest
|
||||
FROM opea/comps-base:$BASE_TAG
|
||||
FROM python:3.11-slim
|
||||
|
||||
COPY ./audioqna_multilang.py $HOME/audioqna_multilang.py
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "audioqna_multilang.py"]
|
||||
|
||||
@@ -71,10 +71,6 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instr
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AudioQnA on Xeon.
|
||||
|
||||
## Deploy using Helm Chart
|
||||
|
||||
Refer to the [AudioQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying AudioQnA on Kubernetes.
|
||||
|
||||
## Supported Models
|
||||
|
||||
### ASR
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
|
||||
@@ -9,36 +10,21 @@ from comps.cores.proto.docarray import LLMParams
|
||||
from fastapi import Request
|
||||
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
|
||||
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
|
||||
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = LLM_MODEL_ID
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["stream"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["voice"] = kwargs["voice"]
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
if self.services[cur_node].service_type == ServiceType.TTS:
|
||||
new_inputs = {}
|
||||
new_inputs["text"] = inputs["choices"][0]["text"]
|
||||
return new_inputs
|
||||
else:
|
||||
return inputs
|
||||
|
||||
|
||||
class AudioQnAService:
|
||||
@@ -47,31 +33,30 @@ class AudioQnAService:
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
self.endpoint = str(MegaServiceEndpoint.AUDIO_QNA)
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
endpoint="/v1/asr",
|
||||
host=ASR_SERVICE_HOST_IP,
|
||||
port=ASR_SERVICE_PORT,
|
||||
endpoint="/v1/audio/transcriptions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=SPEECHT5_SERVER_HOST_IP,
|
||||
port=SPEECHT5_SERVER_PORT,
|
||||
endpoint="/v1/tts",
|
||||
host=TTS_SERVICE_HOST_IP,
|
||||
port=TTS_SERVICE_PORT,
|
||||
endpoint="/v1/audio/speech",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
@@ -92,16 +77,14 @@ class AudioQnAService:
|
||||
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
|
||||
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
|
||||
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
|
||||
stream=False, # TODO add stream LLM output as input to TTS
|
||||
streaming=False, # TODO add streaming LLM output as input to TTS
|
||||
)
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"audio": chat_request.audio},
|
||||
llm_parameters=parameters,
|
||||
voice=chat_request.voice if hasattr(chat_request, "voice") else "default",
|
||||
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
|
||||
)
|
||||
|
||||
last_node = runtime_graph.all_leaves()[-1]
|
||||
response = result_dict[last_node]["tts_result"]
|
||||
response = result_dict[last_node]["byte_str"]
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
@@ -17,19 +18,22 @@ GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
|
||||
if self.services[cur_node].service_type == ServiceType.LLM:
|
||||
print(inputs)
|
||||
if self.services[cur_node].service_type == ServiceType.ASR:
|
||||
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
|
||||
inputs["audio"] = inputs["byte_str"]
|
||||
del inputs["byte_str"]
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = LLM_MODEL_ID
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["stream"] # False as default
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
@@ -104,10 +108,10 @@ class AudioQnAService:
|
||||
frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
|
||||
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
|
||||
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
|
||||
stream=False, # TODO add stream LLM output as input to TTS
|
||||
streaming=False, # TODO add streaming LLM output as input to TTS
|
||||
)
|
||||
result_dict, runtime_graph = await self.megaservice.schedule(
|
||||
initial_inputs={"audio": chat_request.audio}, llm_parameters=parameters
|
||||
initial_inputs={"byte_str": chat_request.audio}, llm_parameters=parameters
|
||||
)
|
||||
|
||||
last_node = runtime_graph.all_leaves()[-1]
|
||||
|
||||
@@ -14,12 +14,12 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||
|
||||
### Launch ASR microservice
|
||||
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/src/README.md).
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps
|
||||
cd GenAIComps
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
# change the name of model by editing model_name_or_path you want to evaluate
|
||||
docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
|
||||
```
|
||||
|
||||
@@ -40,7 +40,7 @@ test_cases:
|
||||
top_k: 10
|
||||
top_p: 0.95
|
||||
repetition_penalty: 1.03
|
||||
stream: true
|
||||
streaming: true
|
||||
llmserve:
|
||||
run_test: true
|
||||
service_name: "llm-svc" # Replace with your service name
|
||||
|
||||
@@ -3,331 +3,132 @@
|
||||
This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice
|
||||
pipeline on server on AMD ROCm GPU platform.
|
||||
|
||||
## Build Docker Images
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Build Docker Image
|
||||
|
||||
- #### Create application install directory and go to it:
|
||||
|
||||
```bash
|
||||
mkdir ~/audioqna-install && cd audioqna-install
|
||||
```
|
||||
|
||||
- #### Clone the repository GenAIExamples (the default repository branch "main" is used here):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
If you need to use a specific branch/tag of the GenAIExamples repository, then (v1.3 replace with its own value):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git && cd GenAIExamples && git checkout v1.3
|
||||
```
|
||||
|
||||
We remind you that when using a specific version of the code, you need to use the README from this version:
|
||||
|
||||
- #### Go to build directory:
|
||||
|
||||
```bash
|
||||
cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_image_build
|
||||
```
|
||||
|
||||
- Cleaning up the GenAIComps repository if it was previously cloned in this directory.
|
||||
This is necessary if the build was performed earlier and the GenAIComps folder exists and is not empty:
|
||||
|
||||
```bash
|
||||
echo Y | rm -R GenAIComps
|
||||
```
|
||||
|
||||
- #### Clone the repository GenAIComps (the default repository branch "main" is used here):
|
||||
### 1. Source Code install GenAIComps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
We remind you that when using a specific version of the code, you need to use the README from this version.
|
||||
|
||||
- #### Setting the list of images for the build (from the build file.yaml)
|
||||
|
||||
If you want to deploy a vLLM-based or TGI-based application, then the set of services is installed as follows:
|
||||
|
||||
#### vLLM-based application
|
||||
|
||||
```bash
|
||||
service_list="vllm-rocm whisper speecht5 audioqna audioqna-ui"
|
||||
```
|
||||
|
||||
#### TGI-based application
|
||||
|
||||
```bash
|
||||
service_list="whisper speecht5 audioqna audioqna-ui"
|
||||
```
|
||||
|
||||
- #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
```
|
||||
|
||||
- #### Build Docker Images
|
||||
|
||||
```bash
|
||||
docker compose -f build.yaml build ${service_list} --no-cache
|
||||
```
|
||||
|
||||
After the build, we check the list of images with the command:
|
||||
|
||||
```bash
|
||||
docker image ls
|
||||
```
|
||||
|
||||
The list of images should include:
|
||||
|
||||
##### vLLM-based application:
|
||||
|
||||
- opea/vllm-rocm:latest
|
||||
- opea/whisper:latest
|
||||
- opea/speecht5:latest
|
||||
- opea/audioqna:latest
|
||||
|
||||
##### TGI-based application:
|
||||
|
||||
- ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
|
||||
- opea/whisper:latest
|
||||
- opea/speecht5:latest
|
||||
- opea/audioqna:latest
|
||||
|
||||
---
|
||||
|
||||
## Deploy the AudioQnA Application
|
||||
|
||||
### Docker Compose Configuration for AMD GPUs
|
||||
|
||||
To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file:
|
||||
|
||||
- compose_vllm.yaml - for vLLM-based application
|
||||
- compose.yaml - for TGI-based
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/:/dev/dri/
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example:
|
||||
|
||||
```yaml
|
||||
shm_size: 1g
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/render128:/dev/dri/render128
|
||||
cap_add:
|
||||
- SYS_PTRACE
|
||||
group_add:
|
||||
- video
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
```
|
||||
|
||||
**How to Identify GPU Device IDs:**
|
||||
Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU.
|
||||
|
||||
### Set deploy environment variables
|
||||
|
||||
#### Setting variables in the operating system environment:
|
||||
|
||||
##### Set variable HUGGINGFACEHUB_API_TOKEN:
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
### Replace the string 'your_huggingfacehub_token' with your HuggingFacehub repository access token.
|
||||
export HUGGINGFACEHUB_API_TOKEN='your_huggingfacehub_token'
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
#### Set variables value in set_env\*\*\*\*.sh file:
|
||||
|
||||
Go to Docker Compose directory:
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/text-generation/Dockerfile .
|
||||
```
|
||||
|
||||
The example uses the Nano text editor. You can use any convenient text editor:
|
||||
Note:
|
||||
For compose for ROCm example AMD optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm (https://github.com/huggingface/text-generation-inference)
|
||||
|
||||
#### If you use vLLM
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
nano set_env_vllm.sh
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
```bash
|
||||
nano set_env.sh
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/AudioQnA/
|
||||
docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
If you are in a proxy environment, also set the proxy-related environment variables:
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/audioqna:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
Before starting the services with `docker compose`, you have to recheck the following environment variables.
|
||||
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your HF token>
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
```
|
||||
|
||||
Set the values of the variables:
|
||||
or use set_env.sh file to setup environment variables.
|
||||
|
||||
- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world.
|
||||
Note: Please replace with host_ip with your external IP address, do not use localhost.
|
||||
|
||||
If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address.
|
||||
Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus)
|
||||
|
||||
If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address.
|
||||
Example for set isolation for 1 GPU
|
||||
|
||||
If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located.
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/renderD128:/dev/dri/renderD128
|
||||
|
||||
We set these values in the file set_env\*\*\*\*.sh
|
||||
Example for set isolation for 2 GPUs
|
||||
|
||||
- **Variables with names like "**\*\*\*\*\*\*\_PORT"\*\* - These variables set the IP port numbers for establishing network connections to the application services.
|
||||
The values shown in the file set_env.sh or set_env_vllm they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use.
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/renderD128:/dev/dri/renderD128
|
||||
- /dev/dri/card0:/dev/dri/card0
|
||||
- /dev/dri/renderD129:/dev/dri/renderD129
|
||||
|
||||
#### Set variables with script set_env\*\*\*\*.sh
|
||||
Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus)
|
||||
|
||||
#### If you use vLLM
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
. set_env_vllm.sh
|
||||
cd GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
|
||||
```bash
|
||||
. set_env.sh
|
||||
```
|
||||
- Failed to download the docker image.
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
### Start the services:
|
||||
Please refer to 'Build Docker Images' in below.
|
||||
|
||||
#### If you use vLLM
|
||||
|
||||
```bash
|
||||
docker compose -f compose_vllm.yaml up -d
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
|
||||
```bash
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
All containers should be running and should not restart:
|
||||
|
||||
##### If you use vLLM:
|
||||
|
||||
- audioqna-vllm-service
|
||||
- whisper-service
|
||||
- speecht5-service
|
||||
- audioqna-backend-server
|
||||
- audioqna-ui-server
|
||||
|
||||
##### If you use TGI:
|
||||
|
||||
- audioqna-tgi-service
|
||||
- whisper-service
|
||||
- speecht5-service
|
||||
- audioqna-backend-server
|
||||
- audioqna-ui-server
|
||||
|
||||
---
|
||||
|
||||
## Validate the Services
|
||||
|
||||
### 1. Validate the vLLM/TGI Service
|
||||
|
||||
#### If you use vLLM:
|
||||
|
||||
```bash
|
||||
DATA='{"model": "Intel/neural-chat-7b-v3-3t", '\
|
||||
'"messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}'
|
||||
|
||||
curl http://${HOST_IP}:${AUDIOQNA_VLLM_SERVICE_PORT}/v1/chat/completions \
|
||||
-X POST \
|
||||
-d "$DATA" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Checking the response from the service. The response should be similar to JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-142f34ef35b64a8db3deedd170fed951",
|
||||
"object": "chat.completion",
|
||||
"created": 1742270316,
|
||||
"model": "Intel/neural-chat-7b-v3-3",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": []
|
||||
},
|
||||
"logprobs": null,
|
||||
"finish_reason": "length",
|
||||
"stop_reason": null
|
||||
}
|
||||
],
|
||||
"usage": { "prompt_tokens": 66, "total_tokens": 322, "completion_tokens": 256, "prompt_tokens_details": null },
|
||||
"prompt_logprobs": null
|
||||
}
|
||||
```
|
||||
|
||||
If the service response has a meaningful response in the value of the "choices.message.content" key,
|
||||
then we consider the vLLM service to be successfully launched
|
||||
|
||||
#### If you use TGI:
|
||||
|
||||
```bash
|
||||
DATA='{"inputs":"What is Deep Learning?",'\
|
||||
'"parameters":{"max_new_tokens":256,"do_sample": true}}'
|
||||
|
||||
curl http://${HOST_IP}:${AUDIOQNA_TGI_SERVICE_PORT}/generate \
|
||||
-X POST \
|
||||
-d "$DATA" \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Checking the response from the service. The response should be similar to JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"generated_text": " "
|
||||
}
|
||||
```
|
||||
|
||||
If the service response has a meaningful response in the value of the "generated_text" key,
|
||||
then we consider the TGI service to be successfully launched
|
||||
|
||||
### 2. Validate MegaServices
|
||||
## 🚀 Consume the AudioQnA Service
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
# voice can be "default" or "male"
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
### 3. Validate MicroServices
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
@@ -336,25 +137,34 @@ curl http://${host_ip}:7066/v1/asr \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
### 4. Stop application
|
||||
|
||||
#### If you use vLLM
|
||||
|
||||
```bash
|
||||
cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
|
||||
docker compose -f compose_vllm.yaml down
|
||||
```
|
||||
|
||||
#### If you use TGI
|
||||
|
||||
```bash
|
||||
cd ~/audioqna-install/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm
|
||||
docker compose -f compose.yaml down
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
```
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user