Compare commits
39 Commits
test-night
...
Fix-sec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
52a6b22f3f | ||
|
|
c8259d47f9 | ||
|
|
b980d6a34c | ||
|
|
2f9959f0a5 | ||
|
|
51b9d3b975 | ||
|
|
d9e7264a81 | ||
|
|
26cb531766 | ||
|
|
e9153b82bb | ||
|
|
0890e94a21 | ||
|
|
581e954a8d | ||
|
|
8a9f3f4351 | ||
|
|
09d93ecce6 | ||
|
|
ed918bcef1 | ||
|
|
1c0b1731c5 | ||
|
|
22174e68a5 | ||
|
|
c8abbc4958 | ||
|
|
7ee6f3657c | ||
|
|
11b04b38db | ||
|
|
7f55b5a100 | ||
|
|
bb9ec6e5d2 | ||
|
|
3fb59a9769 | ||
|
|
410df80925 | ||
|
|
8eac02e58b | ||
|
|
9f80a18cb5 | ||
|
|
f2c8e0b4ff | ||
|
|
fb53c536a3 | ||
|
|
26d07019d0 | ||
|
|
bd6726c53a | ||
|
|
a0bdf8eab2 | ||
|
|
99f2f940b6 | ||
|
|
2596671d3f | ||
|
|
7ffb4107e6 | ||
|
|
7590b055aa | ||
|
|
4efb1e0833 | ||
|
|
ebb7c24ca8 | ||
|
|
bfefdfad34 | ||
|
|
b467a13ec3 | ||
|
|
05011ebaac | ||
|
|
7bb05585b6 |
5
.github/env/_build_image.sh
vendored
Normal file
5
.github/env/_build_image.sh
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
export VLLM_VER=v0.8.3
|
||||||
|
export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
11
.github/workflows/_build_image.yml
vendored
11
.github/workflows/_build_image.yml
vendored
@@ -75,17 +75,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||||
|
source ${{ github.workspace }}/.github/env/_build_image.sh
|
||||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||||
git clone https://github.com/vllm-project/vllm.git && cd vllm
|
git clone -b ${VLLM_VER} --single-branch https://github.com/vllm-project/vllm.git
|
||||||
VLLM_VER=v0.8.3
|
|
||||||
echo "Check out vLLM tag ${VLLM_VER}"
|
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
|
||||||
fi
|
fi
|
||||||
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone -b ${VLLM_FORK_VER} --single-branch https://github.com/HabanaAI/vllm-fork.git
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
|
||||||
echo "Check out vLLM tag ${VLLM_VER}"
|
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
|
||||||
fi
|
fi
|
||||||
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git
|
||||||
cd GenAIComps && git rev-parse HEAD && cd ../
|
cd GenAIComps && git rev-parse HEAD && cd ../
|
||||||
|
|||||||
3
.github/workflows/_gmc-e2e.yml
vendored
3
.github/workflows/_gmc-e2e.yml
vendored
@@ -3,7 +3,8 @@
|
|||||||
|
|
||||||
# This workflow will only test GMC pipeline and will not install GMC any more
|
# This workflow will only test GMC pipeline and will not install GMC any more
|
||||||
name: Single GMC E2e Test For CD Workflow Call
|
name: Single GMC E2e Test For CD Workflow Call
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
3
.github/workflows/_gmc-workflow.yml
vendored
3
.github/workflows/_gmc-workflow.yml
vendored
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Build and deploy GMC system on call and manual
|
name: Build and deploy GMC system on call and manual
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
2
.github/workflows/_helm-e2e.yml
vendored
2
.github/workflows/_helm-e2e.yml
vendored
@@ -137,7 +137,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
example: ${{ inputs.example }}
|
example: ${{ inputs.example }}
|
||||||
run: |
|
run: |
|
||||||
if [[ ! "$example" =~ ^[a-zA-Z]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
|
if [[ ! "$example" =~ ^[a-zA-Z0-9]{1,20}$ ]] || [[ "$example" =~ \.\. ]] || [[ "$example" == -* || "$example" == *- ]]; then
|
||||||
echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
|
echo "Error: Invalid input - only lowercase alphanumeric and internal hyphens allowed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|||||||
10
.github/workflows/_run-docker-compose.yml
vendored
10
.github/workflows/_run-docker-compose.yml
vendored
@@ -204,6 +204,10 @@ jobs:
|
|||||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||||
|
|
||||||
echo "Cleaning up images ..."
|
echo "Cleaning up images ..."
|
||||||
|
df -h
|
||||||
|
sleep 1
|
||||||
|
docker system df
|
||||||
|
sleep 1
|
||||||
if [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
|
if [[ "${{ inputs.hardware }}" == "xeon"* ]]; then
|
||||||
docker system prune -a -f
|
docker system prune -a -f
|
||||||
else
|
else
|
||||||
@@ -213,7 +217,13 @@ jobs:
|
|||||||
docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
|
docker images --filter reference="opea/comps-base" -q | xargs -r docker rmi && sleep 1s
|
||||||
docker system prune -f
|
docker system prune -f
|
||||||
fi
|
fi
|
||||||
|
sleep 5
|
||||||
docker images
|
docker images
|
||||||
|
sleep 1
|
||||||
|
df -h
|
||||||
|
sleep 1
|
||||||
|
docker system df
|
||||||
|
sleep 1
|
||||||
|
|
||||||
- name: Publish pipeline artifact
|
- name: Publish pipeline artifact
|
||||||
if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
|
|||||||
94
.github/workflows/daily-update-vllm-version.yml
vendored
Normal file
94
.github/workflows/daily-update-vllm-version.yml
vendored
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
name: Daily update vLLM & vLLM-fork version
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "30 22 * * *"
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
BRANCH_NAME: "update"
|
||||||
|
USER_NAME: "CICD-at-OPEA"
|
||||||
|
USER_EMAIL: "CICD@opea.dev"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
freeze-tag:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- repo: vLLM
|
||||||
|
repo_name: vllm-project/vllm
|
||||||
|
ver_name: VLLM_VER
|
||||||
|
- repo: vLLM-fork
|
||||||
|
repo_name: HabanaAI/vllm-fork
|
||||||
|
ver_name: VLLM_FORK_VER
|
||||||
|
fail-fast: false
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.ref }}
|
||||||
|
|
||||||
|
- name: Set up Git
|
||||||
|
run: |
|
||||||
|
git config --global user.name ${{ env.USER_NAME }}
|
||||||
|
git config --global user.email ${{ env.USER_EMAIL }}
|
||||||
|
git remote set-url origin https://${{ env.USER_NAME }}:"${{ secrets.ACTION_TOKEN }}"@github.com/${{ github.repository }}.git
|
||||||
|
git fetch
|
||||||
|
|
||||||
|
if git ls-remote https://github.com/${{ github.repository }}.git "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | grep -q "refs/heads/${{ env.BRANCH_NAME }}_${{ matrix.repo }}"; then
|
||||||
|
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} exists"
|
||||||
|
git checkout ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
|
||||||
|
else
|
||||||
|
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} not exists"
|
||||||
|
git checkout -b ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
|
||||||
|
git push origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
|
||||||
|
echo "branch ${{ env.BRANCH_NAME }}_${{ matrix.repo }} created successfully"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run script
|
||||||
|
run: |
|
||||||
|
latest_vllm_ver=$(curl -s "https://api.github.com/repos/${{ matrix.repo_name }}/tags" | jq '.[0].name' -)
|
||||||
|
latest_vllm_ver=$(echo "$latest_vllm_ver" | sed 's/"//g')
|
||||||
|
echo "latest_vllm_ver=${latest_vllm_ver}" >> "$GITHUB_ENV"
|
||||||
|
find . -type f -name "*.sh" -exec sed -i "s/${{ matrix.ver_name }}=.*/${{ matrix.ver_name }}=${latest_vllm_ver}/" {} \;
|
||||||
|
|
||||||
|
- name: Commit changes
|
||||||
|
run: |
|
||||||
|
git add .
|
||||||
|
if git diff-index --quiet HEAD --; then
|
||||||
|
echo "No changes detected, skipping commit."
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
git commit -s -m "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
|
||||||
|
git push --set-upstream origin ${{ env.BRANCH_NAME }}_${{ matrix.repo }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.ACTION_TOKEN }}
|
||||||
|
run: |
|
||||||
|
pr_count=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '. | length')
|
||||||
|
if [ $pr_count -gt 0 ]; then
|
||||||
|
echo "Pull Request exists"
|
||||||
|
pr_number=$(curl -H "Authorization: token ${{ secrets.ACTION_TOKEN }}" -s "https://api.github.com/repos/${{ github.repository }}/pulls?state=all&head=${{ env.USER_NAME }}:${{ env.BRANCH_NAME }}_${{ matrix.repo }}" | jq '.[0].number')
|
||||||
|
gh pr edit ${pr_number} \
|
||||||
|
--title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
|
||||||
|
--body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
|
||||||
|
echo "Pull Request updated successfully"
|
||||||
|
else
|
||||||
|
echo "Pull Request does not exists..."
|
||||||
|
gh pr create \
|
||||||
|
-B main \
|
||||||
|
-H ${{ env.BRANCH_NAME }}_${{ matrix.repo }} \
|
||||||
|
--title "Update ${{ matrix.repo }} version to ${latest_vllm_ver}" \
|
||||||
|
--body "Update ${{ matrix.repo }} version to ${latest_vllm_ver}"
|
||||||
|
echo "Pull Request created successfully"
|
||||||
|
fi
|
||||||
2
.github/workflows/dockerhub-description.yml
vendored
2
.github/workflows/dockerhub-description.yml
vendored
@@ -2,6 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Update Docker Hub Description
|
name: Update Docker Hub Description
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "0 0 * * 0"
|
- cron: "0 0 * * 0"
|
||||||
|
|||||||
2
.github/workflows/manual-docker-clean.yml
vendored
2
.github/workflows/manual-docker-clean.yml
vendored
@@ -2,6 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Clean up container on manual event
|
name: Clean up container on manual event
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
3
.github/workflows/manual-freeze-tag.yml
vendored
3
.github/workflows/manual-freeze-tag.yml
vendored
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Freeze OPEA images release tag
|
name: Freeze OPEA images release tag
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
2
.github/workflows/manual-image-build.yml
vendored
2
.github/workflows/manual-image-build.yml
vendored
@@ -2,6 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Build specific images on manual event
|
name: Build specific images on manual event
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Clean up Local Registry on manual event
|
name: Clean up Local Registry on manual event
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
|||||||
3
.github/workflows/mix-trellix.yml
vendored
3
.github/workflows/mix-trellix.yml
vendored
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Trellix Command Line Scanner
|
name: Trellix Command Line Scanner
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
@@ -2,14 +2,15 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Nightly build/publish latest docker images
|
name: Nightly build/publish latest docker images
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "30 14 * * 1-5" # UTC time
|
- cron: "30 14 * * 1-5" # UTC time
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
EXAMPLES: CodeGen,CodeTrans #${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
|
EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
|
||||||
TAG: "latest"
|
TAG: "latest"
|
||||||
PUBLISH_TAGS: "latest"
|
PUBLISH_TAGS: "latest"
|
||||||
|
|
||||||
@@ -75,7 +76,7 @@ jobs:
|
|||||||
|
|
||||||
publish:
|
publish:
|
||||||
needs: [get-build-matrix, get-image-list, build-images]
|
needs: [get-build-matrix, get-image-list, build-images]
|
||||||
if: ${{ success() }}
|
if: always()
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||||
|
|||||||
3
.github/workflows/pr-chart-e2e.yml
vendored
3
.github/workflows/pr-chart-e2e.yml
vendored
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: E2E Test with Helm Charts
|
name: E2E Test with Helm Charts
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Check Duplicated Images
|
name: Check Duplicated Images
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|||||||
4
.github/workflows/pr-code-scan.yml
vendored
4
.github/workflows/pr-code-scan.yml
vendored
@@ -2,7 +2,9 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Code Scan
|
name: Code Scan
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
security-events: write
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|||||||
3
.github/workflows/pr-docker-compose-e2e.yml
vendored
3
.github/workflows/pr-docker-compose-e2e.yml
vendored
@@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
name: E2E test with docker compose
|
name: E2E test with docker compose
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
branches: ["main", "*rc"]
|
branches: ["main", "*rc"]
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
name: Compose file and dockerfile path checking
|
name: Compose file and dockerfile path checking
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|||||||
33
.github/workflows/pr-link-path-scan.yml
vendored
33
.github/workflows/pr-link-path-scan.yml
vendored
@@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
name: Check hyperlinks and relative path validity
|
name: Check hyperlinks and relative path validity
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
@@ -23,6 +26,7 @@ jobs:
|
|||||||
- name: Check the Validity of Hyperlinks
|
- name: Check the Validity of Hyperlinks
|
||||||
run: |
|
run: |
|
||||||
cd ${{github.workspace}}
|
cd ${{github.workspace}}
|
||||||
|
delay=15
|
||||||
fail="FALSE"
|
fail="FALSE"
|
||||||
merged_commit=$(git log -1 --format='%H')
|
merged_commit=$(git log -1 --format='%H')
|
||||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||||
@@ -35,15 +39,20 @@ jobs:
|
|||||||
# echo $url_line
|
# echo $url_line
|
||||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
|
if [[ "$url" == "https://platform.openai.com/api-keys"* ]]; then
|
||||||
if [ "$response" -ne 200 ]; then
|
echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by a real person."
|
||||||
echo "**********Validation failed, try again**********"
|
else
|
||||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
sleep $delay
|
||||||
if [ "$response_retry" -eq 200 ]; then
|
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
|
||||||
echo "*****Retry successfully*****"
|
if [ "$response" -ne 200 ]; then
|
||||||
else
|
echo "**********Validation failed ($response), try again**********"
|
||||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||||
fail="TRUE"
|
if [ "$response_retry" -eq 200 ]; then
|
||||||
|
echo "*****Retry successfully*****"
|
||||||
|
else
|
||||||
|
echo "Invalid link ($response_retry) from ${{github.workspace}}/$path: $url"
|
||||||
|
fail="TRUE"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
@@ -74,6 +83,7 @@ jobs:
|
|||||||
- name: Checking Relative Path Validity
|
- name: Checking Relative Path Validity
|
||||||
run: |
|
run: |
|
||||||
cd ${{github.workspace}}
|
cd ${{github.workspace}}
|
||||||
|
delay=15
|
||||||
fail="FALSE"
|
fail="FALSE"
|
||||||
repo_name=${{ github.event.pull_request.head.repo.full_name }}
|
repo_name=${{ github.event.pull_request.head.repo.full_name }}
|
||||||
branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"
|
branch="https://github.com/$repo_name/blob/${{ github.event.pull_request.head.ref }}"
|
||||||
@@ -105,14 +115,15 @@ jobs:
|
|||||||
if [[ "$png_line" == *#* ]]; then
|
if [[ "$png_line" == *#* ]]; then
|
||||||
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
|
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
|
||||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
|
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
|
||||||
|
sleep $delay
|
||||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
|
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
|
||||||
if [ "$response" -ne 200 ]; then
|
if [ "$response" -ne 200 ]; then
|
||||||
echo "**********Validation failed, try again**********"
|
echo "**********Validation failed ($response), try again**********"
|
||||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
|
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
|
||||||
if [ "$response_retry" -eq 200 ]; then
|
if [ "$response_retry" -eq 200 ]; then
|
||||||
echo "*****Retry successfully*****"
|
echo "*****Retry successfully*****"
|
||||||
else
|
else
|
||||||
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
|
echo "Invalid path ($response_retry) from ${{github.workspace}}/$refer_path: $png_path"
|
||||||
fail="TRUE"
|
fail="TRUE"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
|||||||
3
.github/workflows/push-image-build.yml
vendored
3
.github/workflows/push-image-build.yml
vendored
@@ -3,6 +3,9 @@
|
|||||||
# Test
|
# Test
|
||||||
name: Build latest images on push event
|
name: Build latest images on push event
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ 'main' ]
|
branches: [ 'main' ]
|
||||||
|
|||||||
@@ -3,10 +3,12 @@
|
|||||||
|
|
||||||
name: Check the validity of links in docker_images_list.
|
name: Check the validity of links in docker_images_list.
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
types: [opened, reopened, ready_for_review, synchronize]
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check-dockerfile-paths:
|
check-dockerfile-paths:
|
||||||
|
|||||||
@@ -8,6 +8,10 @@ on:
|
|||||||
- "**/docker_compose/**/compose*.yaml"
|
- "**/docker_compose/**/compose*.yaml"
|
||||||
|
|
||||||
name: Create an issue to GenAIInfra on push
|
name: Create an issue to GenAIInfra on push
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
job1:
|
job1:
|
||||||
name: Create issue
|
name: Create issue
|
||||||
|
|||||||
4
.github/workflows/weekly-example-test.yml
vendored
4
.github/workflows/weekly-example-test.yml
vendored
@@ -3,13 +3,15 @@
|
|||||||
|
|
||||||
name: Weekly test all examples on multiple HWs
|
name: Weekly test all examples on multiple HWs
|
||||||
|
|
||||||
|
permissions: read-all
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "30 2 * * 6" # UTC time
|
- cron: "30 2 * * 6" # UTC time
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
EXAMPLES: ${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
|
EXAMPLES: "CodeTrans" #${{ vars.NIGHTLY_RELEASE_EXAMPLES }}
|
||||||
NODES: "gaudi,xeon,rocm,arc"
|
NODES: "gaudi,xeon,rocm,arc"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ flowchart LR
|
|||||||
|
|
||||||
#### First, clone the `GenAIExamples` repo.
|
#### First, clone the `GenAIExamples` repo.
|
||||||
|
|
||||||
```
|
```bash
|
||||||
export WORKDIR=<your-work-directory>
|
export WORKDIR=<your-work-directory>
|
||||||
cd $WORKDIR
|
cd $WORKDIR
|
||||||
git clone https://github.com/opea-project/GenAIExamples.git
|
git clone https://github.com/opea-project/GenAIExamples.git
|
||||||
@@ -109,7 +109,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
|
|||||||
|
|
||||||
##### For proxy environments only
|
##### For proxy environments only
|
||||||
|
|
||||||
```
|
```bash
|
||||||
export http_proxy="Your_HTTP_Proxy"
|
export http_proxy="Your_HTTP_Proxy"
|
||||||
export https_proxy="Your_HTTPs_Proxy"
|
export https_proxy="Your_HTTPs_Proxy"
|
||||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||||
@@ -118,14 +118,24 @@ export no_proxy="Your_No_Proxy"
|
|||||||
|
|
||||||
##### For using open-source llms
|
##### For using open-source llms
|
||||||
|
|
||||||
```
|
Set up a [HuggingFace](https://huggingface.co/) account and generate a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
|
||||||
|
|
||||||
|
Then set an environment variable with the token and another for a directory to download the models:
|
||||||
|
|
||||||
|
```bash
|
||||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
|
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> # to avoid redownloading models
|
||||||
```
|
```
|
||||||
|
|
||||||
##### [Optional] OPANAI_API_KEY to use OpenAI models
|
##### [Optional] OPENAI_API_KEY to use OpenAI models or Intel® AI for Enterprise Inference
|
||||||
|
|
||||||
```
|
To use OpenAI models, generate a key following these [instructions](https://platform.openai.com/api-keys).
|
||||||
|
|
||||||
|
To use a remote server running Intel® AI for Enterprise Inference, contact the cloud service provider or owner of the on-prem machine for a key to access the desired model on the server.
|
||||||
|
|
||||||
|
Then set the environment variable `OPENAI_API_KEY` with the key contents:
|
||||||
|
|
||||||
|
```bash
|
||||||
export OPENAI_API_KEY=<your-openai-key>
|
export OPENAI_API_KEY=<your-openai-key>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -133,16 +143,18 @@ export OPENAI_API_KEY=<your-openai-key>
|
|||||||
|
|
||||||
##### Gaudi
|
##### Gaudi
|
||||||
|
|
||||||
```
|
```bash
|
||||||
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
|
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
##### Xeon
|
##### Xeon
|
||||||
|
|
||||||
```
|
```bash
|
||||||
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
|
source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For running
|
||||||
|
|
||||||
### 2. Launch the multi-agent system. </br>
|
### 2. Launch the multi-agent system. </br>
|
||||||
|
|
||||||
We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.
|
We make it convenient to launch the whole system with docker compose, which includes microservices for LLM, agents, UI, retrieval tool, vector database, dataprep, and telemetry. There are 3 docker compose files, which make it easy for users to pick and choose. Users can choose a different retrieval tool other than the `DocIndexRetriever` example provided in our GenAIExamples repo. Users can choose not to launch the telemetry containers.
|
||||||
@@ -184,14 +196,37 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
|
|||||||
|
|
||||||
#### Launch on Xeon
|
#### Launch on Xeon
|
||||||
|
|
||||||
On Xeon, only OpenAI models are supported. The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
|
On Xeon, OpenAI models and models deployed on a remote server are supported. Both methods require an API key.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export OPENAI_API_KEY=<your-openai-key>
|
export OPENAI_API_KEY=<your-openai-key>
|
||||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||||
|
```
|
||||||
|
|
||||||
|
##### OpenAI Models
|
||||||
|
|
||||||
|
The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
|
||||||
|
|
||||||
|
```bash
|
||||||
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
|
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### Models on Remote Server
|
||||||
|
|
||||||
|
When models are deployed on a remote server with Intel® AI for Enterprise Inference, a base URL and an API key are required to access them. To run the Agent microservice on Xeon while using models deployed on a remote server, add `compose_remote.yaml` to the `docker compose` command and set additional environment variables.
|
||||||
|
|
||||||
|
###### Notes
|
||||||
|
|
||||||
|
- `OPENAI_API_KEY` is already set in a previous step.
|
||||||
|
- `model` is used to overwrite the value set for this environment variable in `set_env.sh`.
|
||||||
|
- `LLM_ENDPOINT_URL` is the base URL given from the owner of the on-prem machine or cloud service provider. It will follow this format: "https://<DNS>". Here is an example: "https://api.inference.example.com".
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export model=<name-of-model-card>
|
||||||
|
export LLM_ENDPOINT_URL=<http-endpoint-of-remote-server>
|
||||||
|
docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml -f compose_remote.yaml up -d
|
||||||
|
```
|
||||||
|
|
||||||
### 3. Ingest Data into the vector database
|
### 3. Ingest Data into the vector database
|
||||||
|
|
||||||
The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.
|
The `run_ingest_data.sh` script will use an example jsonl file to ingest example documents into a vector database. Other ways to ingest data and other types of documents supported can be found in the OPEA dataprep microservice located in the opea-project/GenAIComps repo.
|
||||||
@@ -208,12 +243,18 @@ bash run_ingest_data.sh
|
|||||||
The UI microservice is launched in the previous step with the other microservices.
|
The UI microservice is launched in the previous step with the other microservices.
|
||||||
To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
|
To see the UI, open a web browser to `http://${ip_address}:5173` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
|
||||||
|
|
||||||
1. `create Admin Account` with a random value
|
1. Click on the arrow above `Get started`. Create an admin account with a name, email, and password.
|
||||||
2. add opea agent endpoint `http://$ip_address:9090/v1` which is a openai compatible api
|
2. Add an OpenAI-compatible API endpoint. In the upper right, click on the circle button with the user's initial, go to `Admin Settings`->`Connections`. Under `Manage OpenAI API Connections`, click on the `+` to add a connection. Fill in these fields:
|
||||||
|
|
||||||
|
- **URL**: `http://${ip_address}:9090/v1`, do not forget the `v1`
|
||||||
|
- **Key**: any value
|
||||||
|
- **Model IDs**: any name i.e. `opea-agent`, then press `+` to add it
|
||||||
|
|
||||||
|
Click "Save".
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
3. test opea agent with ui
|
3. Test OPEA agent with UI. Return to `New Chat` and ensure the model (i.e. `opea-agent`) is selected near the upper left. Enter in any prompt to interact with the agent.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
|||||||
18
AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
Normal file
18
AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
services:
|
||||||
|
worker-rag-agent:
|
||||||
|
environment:
|
||||||
|
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||||
|
api_key: ${OPENAI_API_KEY}
|
||||||
|
|
||||||
|
worker-sql-agent:
|
||||||
|
environment:
|
||||||
|
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||||
|
api_key: ${OPENAI_API_KEY}
|
||||||
|
|
||||||
|
supervisor-react-agent:
|
||||||
|
environment:
|
||||||
|
llm_endpoint_url: ${LLM_ENDPOINT_URL}
|
||||||
|
api_key: ${OPENAI_API_KEY}
|
||||||
@@ -37,8 +37,8 @@ function build_agent_docker_image_gaudi_vllm() {
|
|||||||
get_genai_comps
|
get_genai_comps
|
||||||
|
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build agent image with --no-cache..."
|
echo "Build agent image with --no-cache..."
|
||||||
service_list="agent agent-ui vllm-gaudi"
|
service_list="agent agent-ui vllm-gaudi"
|
||||||
|
|||||||
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export host_ip="192.165.1.21"
|
export host_ip=${ip_address}
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${YOUR_HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
# <token>
|
# <token>
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||||
|
|||||||
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
# export host_ip=<your External Public IP> # export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export host_ip=""
|
export host_ip=${ip_address}
|
||||||
export external_host_ip=""
|
export external_host_ip=${ip_address}
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export HF_CACHE_DIR="./data"
|
export HF_CACHE_DIR="./data"
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||||
|
|||||||
@@ -14,7 +14,8 @@ export MEGA_SERVICE_HOST_IP=${host_ip}
|
|||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||||
export LLM_SERVER_HOST_IP=${host_ip}
|
export LLM_SERVER_HOST_IP=${host_ip}
|
||||||
|
export GPT_SOVITS_SERVER_HOST_IP=${host_ip}
|
||||||
|
export GPT_SOVITS_SERVER_PORT=9880
|
||||||
export WHISPER_SERVER_PORT=7066
|
export WHISPER_SERVER_PORT=7066
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
export SPEECHT5_SERVER_PORT=7055
|
||||||
export LLM_SERVER_PORT=3006
|
export LLM_SERVER_PORT=3006
|
||||||
|
|||||||
45
AudioQnA/tests/README.md
Normal file
45
AudioQnA/tests/README.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# AudioQnA E2E test scripts
|
||||||
|
|
||||||
|
## Set the required environment variable
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run test
|
||||||
|
|
||||||
|
On Intel Xeon with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_tgi_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_tgi_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On AMD ROCm with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_rocm.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On AMD ROCm with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_vllm_on_rocm.sh
|
||||||
|
```
|
||||||
@@ -40,21 +40,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export GPT_SOVITS_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export GPT_SOVITS_SERVER_PORT=9880
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
|
source set_env.sh
|
||||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
|
|||||||
@@ -27,9 +27,9 @@ function build_docker_images() {
|
|||||||
|
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||||
cd vllm-fork/
|
cd vllm-fork/
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
echo "Check out vLLM tag ${VLLM_VER}"
|
echo "Check out vLLM tag ${VLLM_FORK_VER}"
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
|
service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi"
|
||||||
@@ -40,24 +40,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
export NUM_CARDS=1
|
|
||||||
export BLOCK_SIZE=128
|
|
||||||
export MAX_NUM_SEQS=256
|
|
||||||
export MAX_SEQ_LEN_TO_CAPTURE=2048
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
|
source set_env.sh
|
||||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
|
|||||||
@@ -35,20 +35,7 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
source set_env.sh
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
n=0
|
n=0
|
||||||
|
|||||||
@@ -40,21 +40,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
|
source set_env.sh
|
||||||
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
# sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
|
|||||||
@@ -34,21 +34,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
|
source set_env.sh
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
n=0
|
n=0
|
||||||
|
|||||||
@@ -34,21 +34,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LLM_MODEL_ID=meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${ip_address}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${ip_address}
|
|
||||||
export LLM_SERVER_HOST_IP=${ip_address}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${ip_address}:3008/v1/audioqna
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
|
source set_env.sh
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
n=0
|
n=0
|
||||||
|
|||||||
@@ -33,27 +33,7 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
cd $WORKPATH/docker_compose/amd/gpu/rocm/
|
||||||
|
source set_env_vllm.sh
|
||||||
export host_ip=${ip_address}
|
|
||||||
export external_host_ip=${ip_address}
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export HF_CACHE_DIR="./data"
|
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
|
||||||
export VLLM_SERVICE_PORT="8081"
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVER_HOST_IP=${host_ip}
|
|
||||||
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_PORT=${VLLM_SERVICE_PORT}
|
|
||||||
export BACKEND_SERVICE_PORT=3008
|
|
||||||
export FRONTEND_SERVICE_PORT=5173
|
|
||||||
|
|
||||||
export BACKEND_SERVICE_ENDPOINT=http://${external_host_ip}:${BACKEND_SERVICE_PORT}/v1/audioqna
|
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ARG IMAGE_REPO=opea
|
||||||
ARG BASE_TAG=latest
|
ARG BASE_TAG=latest
|
||||||
FROM opea/comps-base:$BASE_TAG
|
FROM $IMAGE_REPO/comps-base:$BASE_TAG
|
||||||
|
|
||||||
COPY ./avatarchatbot.py $HOME/avatarchatbot.py
|
COPY ./avatarchatbot.py $HOME/avatarchatbot.py
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
|||||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||||
export AUDIO='None'
|
export AUDIO='None'
|
||||||
export FACESIZE=96
|
export FACESIZE=96
|
||||||
export OUTFILE="/outputs/result.mp4"
|
export OUTFILE="./outputs/result.mp4"
|
||||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||||
export UPSCALE_FACTOR=1
|
export UPSCALE_FACTOR=1
|
||||||
export FPS=10
|
export FPS=5
|
||||||
|
|||||||
@@ -5,3 +5,32 @@
|
|||||||
pushd "../../../../../" > /dev/null
|
pushd "../../../../../" > /dev/null
|
||||||
source .set_env.sh
|
source .set_env.sh
|
||||||
popd > /dev/null
|
popd > /dev/null
|
||||||
|
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
|
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||||
|
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||||
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
|
export WHISPER_SERVER_PORT=7066
|
||||||
|
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||||
|
export SPEECHT5_SERVER_PORT=7055
|
||||||
|
export LLM_SERVER_HOST_IP=${host_ip}
|
||||||
|
export LLM_SERVER_PORT=3006
|
||||||
|
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ANIMATION_SERVICE_PORT=3008
|
||||||
|
|
||||||
|
export MEGA_SERVICE_PORT=8888
|
||||||
|
|
||||||
|
export DEVICE="cpu"
|
||||||
|
export WAV2LIP_PORT=7860
|
||||||
|
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||||
|
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||||
|
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
||||||
|
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||||
|
export AUDIO='None'
|
||||||
|
export FACESIZE=96
|
||||||
|
export OUTFILE="/outputs/result.mp4"
|
||||||
|
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||||
|
export UPSCALE_FACTOR=1
|
||||||
|
export FPS=10
|
||||||
|
|||||||
@@ -5,3 +5,35 @@
|
|||||||
pushd "../../../../../" > /dev/null
|
pushd "../../../../../" > /dev/null
|
||||||
source .set_env.sh
|
source .set_env.sh
|
||||||
popd > /dev/null
|
popd > /dev/null
|
||||||
|
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||||
|
export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
|
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||||
|
|
||||||
|
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||||
|
|
||||||
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export WHISPER_SERVER_HOST_IP=${host_ip}
|
||||||
|
export WHISPER_SERVER_PORT=7066
|
||||||
|
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
||||||
|
export SPEECHT5_SERVER_PORT=7055
|
||||||
|
export LLM_SERVER_HOST_IP=${host_ip}
|
||||||
|
export LLM_SERVER_PORT=3006
|
||||||
|
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||||
|
export ANIMATION_SERVICE_PORT=3008
|
||||||
|
|
||||||
|
export MEGA_SERVICE_PORT=8888
|
||||||
|
|
||||||
|
export DEVICE="hpu"
|
||||||
|
export WAV2LIP_PORT=7860
|
||||||
|
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||||
|
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||||
|
export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg"
|
||||||
|
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||||
|
export AUDIO='None'
|
||||||
|
export FACESIZE=96
|
||||||
|
export OUTFILE="/outputs/result.mp4"
|
||||||
|
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||||
|
export UPSCALE_FACTOR=1
|
||||||
|
export FPS=10
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ services:
|
|||||||
avatarchatbot:
|
avatarchatbot:
|
||||||
build:
|
build:
|
||||||
args:
|
args:
|
||||||
|
IMAGE_REPO: ${REGISTRY:-opea}
|
||||||
|
BASE_TAG: ${TAG:-latest}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
|
|||||||
27
AvatarChatbot/tests/README.md
Normal file
27
AvatarChatbot/tests/README.md
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# AvatarChatbot E2E test scripts
|
||||||
|
|
||||||
|
## Set the required environment variable
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run test
|
||||||
|
|
||||||
|
On Intel Xeon with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On AMD ROCm with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_rocm.sh
|
||||||
|
```
|
||||||
@@ -24,19 +24,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
|
service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
|
||||||
@@ -51,37 +45,7 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
source set_env.sh
|
||||||
export host_ip=$(hostname -I | awk '{print $1}')
|
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
|
||||||
|
|
||||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export ANIMATION_SERVICE_PORT=3008
|
|
||||||
|
|
||||||
export MEGA_SERVICE_PORT=8888
|
|
||||||
|
|
||||||
export DEVICE="hpu"
|
|
||||||
export WAV2LIP_PORT=7860
|
|
||||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
|
||||||
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
|
||||||
export FACE="/home/user/comps/animation/src/assets/img/avatar1.jpg"
|
|
||||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
|
||||||
export AUDIO='None'
|
|
||||||
export FACESIZE=96
|
|
||||||
export OUTFILE="/outputs/result.mp4"
|
|
||||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
|
||||||
export UPSCALE_FACTOR=1
|
|
||||||
export FPS=10
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
@@ -128,19 +92,29 @@ function stop_docker() {
|
|||||||
|
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
echo y | docker builder prune --all
|
echo "::endgroup::"
|
||||||
echo y | docker image prune
|
docker builder prune --all -f
|
||||||
|
docker image prune -f
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
start_services
|
echo "::endgroup::"
|
||||||
# validate_microservices
|
|
||||||
validate_megaservice
|
|
||||||
# validate_frontend
|
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
echo y | docker builder prune --all
|
echo "::endgroup::"
|
||||||
echo y | docker image prune
|
docker builder prune --all -f
|
||||||
|
docker image prune -f
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -25,6 +25,10 @@ ip_address=$(hostname -I | awk '{print $1}')
|
|||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation"
|
service_list="avatarchatbot whisper asr speecht5 tts wav2lip animation"
|
||||||
@@ -38,48 +42,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||||
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
|
||||||
export OPENAI_API_KEY=$OPENAI_API_KEY
|
export OPENAI_API_KEY=$OPENAI_API_KEY
|
||||||
export host_ip=${ip_address}
|
source set_env.sh
|
||||||
|
|
||||||
export TGI_SERVICE_PORT=3006
|
|
||||||
export TGI_LLM_ENDPOINT=http://${host_ip}:${TGI_SERVICE_PORT}
|
|
||||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
|
||||||
|
|
||||||
export ASR_ENDPOINT=http://${host_ip}:7066
|
|
||||||
export TTS_ENDPOINT=http://${host_ip}:7055
|
|
||||||
export WAV2LIP_ENDPOINT=http://${host_ip}:7860
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
|
|
||||||
export MEGA_SERVICE_PORT=8888
|
|
||||||
export ASR_SERVICE_PORT=3001
|
|
||||||
export TTS_SERVICE_PORT=3002
|
|
||||||
export LLM_SERVICE_PORT=3006
|
|
||||||
export ANIMATION_SERVICE_PORT=3008
|
|
||||||
|
|
||||||
export DEVICE="cpu"
|
|
||||||
export WAV2LIP_PORT=7860
|
|
||||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
|
||||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
|
||||||
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
|
||||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
|
||||||
export AUDIO='None'
|
|
||||||
export FACESIZE=96
|
|
||||||
export OUTFILE="./outputs/result.mp4"
|
|
||||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
|
||||||
export UPSCALE_FACTOR=1
|
|
||||||
export FPS=5
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose up -d --force-recreate
|
docker compose up -d --force-recreate
|
||||||
@@ -138,11 +102,6 @@ function validate_megaservice() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#function validate_frontend() {
|
|
||||||
|
|
||||||
#}
|
|
||||||
|
|
||||||
|
|
||||||
function stop_docker() {
|
function stop_docker() {
|
||||||
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
cd $WORKPATH/docker_compose/amd/gpu/rocm
|
||||||
docker compose down && docker compose rm -f
|
docker compose down && docker compose rm -f
|
||||||
@@ -151,19 +110,27 @@ function stop_docker() {
|
|||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
|
||||||
echo $OPENAI_API_KEY
|
echo "::group::stop_docker"
|
||||||
echo $OPENAI_KEY
|
|
||||||
|
|
||||||
stop_docker
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
start_services
|
echo "::endgroup::"
|
||||||
# validate_microservices
|
|
||||||
sleep 30
|
|
||||||
validate_megaservice
|
|
||||||
# validate_frontend
|
|
||||||
stop_docker
|
|
||||||
|
|
||||||
echo y | docker system prune
|
echo "::group::start_services"
|
||||||
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -24,19 +24,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="avatarchatbot whisper speecht5 wav2lip animation"
|
service_list="avatarchatbot whisper speecht5 wav2lip animation"
|
||||||
@@ -51,37 +45,7 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
source set_env.sh
|
||||||
export host_ip=$(hostname -I | awk '{print $1}')
|
|
||||||
|
|
||||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
|
||||||
|
|
||||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
|
||||||
|
|
||||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_HOST_IP=${host_ip}
|
|
||||||
export WHISPER_SERVER_PORT=7066
|
|
||||||
export SPEECHT5_SERVER_HOST_IP=${host_ip}
|
|
||||||
export SPEECHT5_SERVER_PORT=7055
|
|
||||||
export LLM_SERVER_HOST_IP=${host_ip}
|
|
||||||
export LLM_SERVER_PORT=3006
|
|
||||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
|
||||||
export ANIMATION_SERVICE_PORT=3008
|
|
||||||
|
|
||||||
export MEGA_SERVICE_PORT=8888
|
|
||||||
|
|
||||||
export DEVICE="cpu"
|
|
||||||
export WAV2LIP_PORT=7860
|
|
||||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
|
||||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
|
||||||
export FACE="/home/user/comps/animation/src/assets/img/avatar5.png"
|
|
||||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
|
||||||
export AUDIO='None'
|
|
||||||
export FACESIZE=96
|
|
||||||
export OUTFILE="/outputs/result.mp4"
|
|
||||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
|
||||||
export UPSCALE_FACTOR=1
|
|
||||||
export FPS=10
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
@@ -127,16 +91,28 @@ function stop_docker() {
|
|||||||
|
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
stop_docker
|
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
|
||||||
start_services
|
|
||||||
# validate_microservices
|
|
||||||
validate_megaservice
|
|
||||||
# validate_frontend
|
|
||||||
stop_docker
|
|
||||||
|
|
||||||
echo y | docker builder prune --all
|
echo "::group::stop_docker"
|
||||||
echo y | docker image prune
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ services:
|
|||||||
- chatqna-redis-vector-db
|
- chatqna-redis-vector-db
|
||||||
- chatqna-tei-embedding-service
|
- chatqna-tei-embedding-service
|
||||||
ports:
|
ports:
|
||||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ services:
|
|||||||
- chatqna-redis-vector-db
|
- chatqna-redis-vector-db
|
||||||
- chatqna-tei-embedding-service
|
- chatqna-tei-embedding-service
|
||||||
ports:
|
ports:
|
||||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ services:
|
|||||||
- chatqna-redis-vector-db
|
- chatqna-redis-vector-db
|
||||||
- chatqna-tei-embedding-service
|
- chatqna-tei-embedding-service
|
||||||
ports:
|
ports:
|
||||||
- "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
|
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ services:
|
|||||||
- chatqna-redis-vector-db
|
- chatqna-redis-vector-db
|
||||||
- chatqna-tei-embedding-service
|
- chatqna-tei-embedding-service
|
||||||
ports:
|
ports:
|
||||||
- "${CHATQNA_REDIS_DATAPREP_PORT:-5000}:5000"
|
- "${CHATQNA_REDIS_DATAPREP_PORT:-18103}:5000"
|
||||||
environment:
|
environment:
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
|
|||||||
@@ -2,17 +2,17 @@
|
|||||||
|
|
||||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||||
|
|
||||||
export HOST_IP=''
|
export HOST_IP=${ip_address}
|
||||||
export HOST_IP_EXTERNAL=''
|
export HOST_IP_EXTERNAL=${ip_address}
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||||
export CHATQNA_NGINX_PORT=18104
|
export CHATQNA_NGINX_PORT=80
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||||
|
|||||||
@@ -2,18 +2,18 @@
|
|||||||
|
|
||||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||||
|
|
||||||
export HOST_IP=''
|
export HOST_IP=${ip_address}
|
||||||
export HOST_IP_EXTERNAL=''
|
export HOST_IP_EXTERNAL=${ip_address}
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||||
export CHATQNA_NGINX_PORT=18104
|
export CHATQNA_NGINX_PORT=80
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||||
|
|||||||
@@ -2,18 +2,18 @@
|
|||||||
|
|
||||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||||
|
|
||||||
export HOST_IP=''
|
export HOST_IP=${ip_address}
|
||||||
export HOST_IP_EXTERNAL=''
|
export HOST_IP_EXTERNAL=${ip_address}
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
export CHATQNA_LLM_FAQGEN_PORT=18011
|
||||||
export CHATQNA_NGINX_PORT=18104
|
export CHATQNA_NGINX_PORT=80
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||||
|
|||||||
@@ -2,17 +2,17 @@
|
|||||||
|
|
||||||
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
# Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||||
|
|
||||||
export HOST_IP=''
|
export HOST_IP=${ip_address}
|
||||||
export HOST_IP_EXTERNAL=''
|
export HOST_IP_EXTERNAL=${ip_address}
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=18102
|
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=18101
|
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
||||||
export CHATQNA_NGINX_PORT=18104
|
export CHATQNA_NGINX_PORT=80
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
||||||
|
|||||||
@@ -1,35 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
# Update the package index
|
|
||||||
sudo apt-get -y update
|
|
||||||
|
|
||||||
# Install prerequisites
|
|
||||||
sudo apt-get -y install ca-certificates curl
|
|
||||||
|
|
||||||
# Create the directory for the Docker GPG key
|
|
||||||
sudo install -m 0755 -d /etc/apt/keyrings
|
|
||||||
|
|
||||||
# Add Docker's official GPG key
|
|
||||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
|
||||||
|
|
||||||
# Set permissions for the GPG key
|
|
||||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
|
||||||
|
|
||||||
# Add Docker repository to the sources list
|
|
||||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
|
||||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
|
||||||
|
|
||||||
# Update the package index with Docker packages
|
|
||||||
sudo apt-get -y update
|
|
||||||
|
|
||||||
# Install Docker packages
|
|
||||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
|
||||||
|
|
||||||
# add existing user
|
|
||||||
sudo usermod -aG docker $USER
|
|
||||||
|
|
||||||
# Optional: Verify that Docker is installed correctly
|
|
||||||
sudo docker --version
|
|
||||||
@@ -156,6 +156,7 @@ In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we
|
|||||||
| [compose_faqgen_tgi.yaml](./compose_faqgen_tgi.yaml) | Enables FAQ generation using TGI as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). |
|
| [compose_faqgen_tgi.yaml](./compose_faqgen_tgi.yaml) | Enables FAQ generation using TGI as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). |
|
||||||
| [compose.telemetry.yaml](./compose.telemetry.yaml) | Helper file for telemetry features for vllm. Can be used along with any compose files that serves vllm |
|
| [compose.telemetry.yaml](./compose.telemetry.yaml) | Helper file for telemetry features for vllm. Can be used along with any compose files that serves vllm |
|
||||||
| [compose_tgi.telemetry.yaml](./compose_tgi.telemetry.yaml) | Helper file for telemetry features for tgi. Can be used along with any compose files that serves tgi |
|
| [compose_tgi.telemetry.yaml](./compose_tgi.telemetry.yaml) | Helper file for telemetry features for tgi. Can be used along with any compose files that serves tgi |
|
||||||
|
| [compose_mariadb.yaml](./compose_mariadb.yaml) | Uses MariaDB Server as the vector database. All other configurations remain the same as the default |
|
||||||
|
|
||||||
## ChatQnA with Conversational UI (Optional)
|
## ChatQnA with Conversational UI (Optional)
|
||||||
|
|
||||||
|
|||||||
259
ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md
Normal file
259
ChatQnA/docker_compose/intel/cpu/xeon/README_mariadb.md
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
# Deploying ChatQnA with MariaDB Vector on Intel® Xeon® Processors
|
||||||
|
|
||||||
|
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel® Xeon® servers. The pipeline integrates **MariaDB Vector** as the vector database and includes microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Build Docker Images](#build-docker-images)
|
||||||
|
2. [Validate Microservices](#validate-microservices)
|
||||||
|
3. [Launch the UI](#launch-the-ui)
|
||||||
|
4. [Launch the Conversational UI (Optional)](#launch-the-conversational-ui-optional)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build Docker Images
|
||||||
|
|
||||||
|
First of all, you need to build Docker Images locally and install the python package of it.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/opea-project/GenAIComps.git
|
||||||
|
cd GenAIComps
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1. Build Retriever Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Build Dataprep Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Build MegaService Docker Image
|
||||||
|
|
||||||
|
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/opea-project/GenAIExamples.git
|
||||||
|
cd GenAIExamples/ChatQnA/
|
||||||
|
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
cd ../..
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Build UI Docker Image
|
||||||
|
|
||||||
|
Build frontend Docker image via below command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/ui
|
||||||
|
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||||
|
cd ../../..
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Build Conversational React UI Docker Image (Optional)
|
||||||
|
|
||||||
|
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
|
||||||
|
|
||||||
|
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/ui
|
||||||
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
|
||||||
|
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep/ingest"
|
||||||
|
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
|
||||||
|
cd ../../..
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Build Nginx Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIComps
|
||||||
|
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run the command `docker images`, you will have the following 5 Docker Images:
|
||||||
|
|
||||||
|
1. `opea/dataprep:latest`
|
||||||
|
2. `opea/retriever:latest`
|
||||||
|
3. `opea/chatqna:latest`
|
||||||
|
4. `opea/chatqna-ui:latest`
|
||||||
|
5. `opea/nginx:latest`
|
||||||
|
|
||||||
|
## Start Microservices
|
||||||
|
|
||||||
|
### Required Models
|
||||||
|
|
||||||
|
By default, the embedding, reranking and LLM models are set to a default value as listed below:
|
||||||
|
|
||||||
|
| Service | Model |
|
||||||
|
| --------- | ----------------------------------- |
|
||||||
|
| Embedding | BAAI/bge-base-en-v1.5 |
|
||||||
|
| Reranking | BAAI/bge-reranker-base |
|
||||||
|
| LLM | meta-llama/Meta-Llama-3-8B-Instruct |
|
||||||
|
|
||||||
|
Change the `xxx_MODEL_ID` below for your needs.
|
||||||
|
|
||||||
|
### Setup Environment Variables
|
||||||
|
|
||||||
|
Since the `compose.yaml` will consume some environment variables, you need to set them up in advance as below.
|
||||||
|
|
||||||
|
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
|
||||||
|
|
||||||
|
> Change the External_Public_IP below with the actual IPV4 value
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export host_ip="External_Public_IP"
|
||||||
|
```
|
||||||
|
|
||||||
|
> Change to your actual Huggingface API Token value
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export no_proxy=${your_no_proxy},chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-mariadb-vector,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export no_proxy=${your_no_proxy}
|
||||||
|
export http_proxy=${your_http_proxy}
|
||||||
|
export https_proxy=${your_http_proxy}
|
||||||
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
export MARIADB_DATABASE="vectordb"
|
||||||
|
export MARIADB_USER="chatqna"
|
||||||
|
export MARIADB_PASSWORD="password"
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: Please replace with `host_ip` with you external IP address, do not use localhost.
|
||||||
|
|
||||||
|
### Start all the services Docker Containers
|
||||||
|
|
||||||
|
> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
|
||||||
|
docker compose -f compose_mariadb.yaml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### Validate Microservices
|
||||||
|
|
||||||
|
Follow the instructions to validate MicroServices.
|
||||||
|
For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md).
|
||||||
|
|
||||||
|
1. TEI Embedding Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl ${host_ip}:6040/embed \
|
||||||
|
-X POST \
|
||||||
|
-d '{"inputs":"What is Deep Learning?"}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Retriever Microservice
|
||||||
|
|
||||||
|
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
|
||||||
|
is determined by the embedding model.
|
||||||
|
Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
|
||||||
|
|
||||||
|
Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||||
|
curl http://${host_ip}:6045/v1/retrieval \
|
||||||
|
-X POST \
|
||||||
|
-d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
3. TEI Reranking Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:6041/rerank \
|
||||||
|
-X POST \
|
||||||
|
-d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
4. LLM Backend Service
|
||||||
|
|
||||||
|
In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready.
|
||||||
|
|
||||||
|
Try the command below to check whether the LLM service is ready.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker logs vllm-service 2>&1 | grep complete
|
||||||
|
```
|
||||||
|
|
||||||
|
If the service is ready, you will get the response like below.
|
||||||
|
|
||||||
|
```text
|
||||||
|
INFO: Application startup complete.
|
||||||
|
```
|
||||||
|
|
||||||
|
Then try the `cURL` command below to validate vLLM service.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:6042/v1/chat/completions \
|
||||||
|
-X POST \
|
||||||
|
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
5. MegaService
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||||
|
"messages": "What is the revenue of Nike in 2023?"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Dataprep Microservice(Optional)
|
||||||
|
|
||||||
|
If you want to update the default knowledge base, you can use the following commands:
|
||||||
|
|
||||||
|
Update Knowledge Base via Local File Upload:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F "files=@./your_file.pdf"
|
||||||
|
```
|
||||||
|
|
||||||
|
This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
|
||||||
|
|
||||||
|
Add Knowledge Base via HTTP Links:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
|
||||||
|
-H "Content-Type: multipart/form-data" \
|
||||||
|
-F 'link_list=["https://opea.dev"]'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Launch the UI
|
||||||
|
|
||||||
|
To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
chatqna-xeon-ui-server:
|
||||||
|
image: opea/chatqna-ui:latest
|
||||||
|
...
|
||||||
|
ports:
|
||||||
|
- "80:5173"
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Here is an example of running ChatQnA:
|
||||||
|
|
||||||
|

|
||||||
185
ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
Normal file
185
ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# Copyright (C) 2025 MariaDB Foundation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
services:
|
||||||
|
mariadb-server:
|
||||||
|
image: mariadb:latest
|
||||||
|
container_name: mariadb-server
|
||||||
|
ports:
|
||||||
|
- "3306:3306"
|
||||||
|
environment:
|
||||||
|
- MARIADB_DATABASE=${MARIADB_DATABASE}
|
||||||
|
- MARIADB_USER=${MARIADB_USER}
|
||||||
|
- MARIADB_PASSWORD=${MARIADB_PASSWORD}
|
||||||
|
- MARIADB_RANDOM_ROOT_PASSWORD=1
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
|
||||||
|
start_period: 10s
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
dataprep-mariadb-vector:
|
||||||
|
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
|
||||||
|
container_name: dataprep-mariadb-vector
|
||||||
|
depends_on:
|
||||||
|
mariadb-server:
|
||||||
|
condition: service_healthy
|
||||||
|
tei-embedding-service:
|
||||||
|
condition: service_started
|
||||||
|
ports:
|
||||||
|
- "6007:5000"
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MARIADBVECTOR"
|
||||||
|
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
|
||||||
|
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 50
|
||||||
|
restart: unless-stopped
|
||||||
|
tei-embedding-service:
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
container_name: tei-embedding-server
|
||||||
|
ports:
|
||||||
|
- "6006:80"
|
||||||
|
volumes:
|
||||||
|
- "${MODEL_CACHE:-./data}:/data"
|
||||||
|
shm_size: 1g
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||||
|
retriever:
|
||||||
|
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
|
||||||
|
container_name: retriever-mariadb-vector
|
||||||
|
depends_on:
|
||||||
|
mariadb-server:
|
||||||
|
condition: service_healthy
|
||||||
|
ports:
|
||||||
|
- "7000:7000"
|
||||||
|
ipc: host
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
MARIADB_CONNECTION_URL: mariadb+mariadbconnector://${MARIADB_USER}:${MARIADB_PASSWORD}@mariadb-server:3306/${MARIADB_DATABASE}
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
LOGFLAG: ${LOGFLAG}
|
||||||
|
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_MARIADBVECTOR"
|
||||||
|
restart: unless-stopped
|
||||||
|
tei-reranking-service:
|
||||||
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||||
|
container_name: tei-reranking-server
|
||||||
|
ports:
|
||||||
|
- "8808:80"
|
||||||
|
volumes:
|
||||||
|
- "${MODEL_CACHE:-./data}:/data"
|
||||||
|
shm_size: 1g
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
|
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||||
|
vllm-service:
|
||||||
|
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||||
|
container_name: vllm-service
|
||||||
|
ports:
|
||||||
|
- "9009:80"
|
||||||
|
volumes:
|
||||||
|
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||||
|
shm_size: 128g
|
||||||
|
environment:
|
||||||
|
no_proxy: ${no_proxy}
|
||||||
|
http_proxy: ${http_proxy}
|
||||||
|
https_proxy: ${https_proxy}
|
||||||
|
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||||
|
VLLM_TORCH_PROFILER_DIR: "/mnt"
|
||||||
|
VLLM_CPU_KVCACHE_SPACE: 40
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 100
|
||||||
|
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
|
||||||
|
chatqna-xeon-backend-server:
|
||||||
|
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||||
|
container_name: chatqna-xeon-backend-server
|
||||||
|
depends_on:
|
||||||
|
mariadb-server:
|
||||||
|
condition: service_healthy
|
||||||
|
dataprep-mariadb-vector:
|
||||||
|
condition: service_healthy
|
||||||
|
tei-embedding-service:
|
||||||
|
condition: service_started
|
||||||
|
retriever:
|
||||||
|
condition: service_started
|
||||||
|
tei-reranking-service:
|
||||||
|
condition: service_started
|
||||||
|
vllm-service:
|
||||||
|
condition: service_healthy
|
||||||
|
ports:
|
||||||
|
- "8888:8888"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
|
||||||
|
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||||
|
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||||
|
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||||
|
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||||
|
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||||
|
- LLM_SERVER_HOST_IP=vllm-service
|
||||||
|
- LLM_SERVER_PORT=80
|
||||||
|
- LLM_MODEL=${LLM_MODEL_ID}
|
||||||
|
- LOGFLAG=${LOGFLAG}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
chatqna-xeon-ui-server:
|
||||||
|
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||||
|
container_name: chatqna-xeon-ui-server
|
||||||
|
depends_on:
|
||||||
|
- chatqna-xeon-backend-server
|
||||||
|
ports:
|
||||||
|
- "5173:5173"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
chatqna-xeon-nginx-server:
|
||||||
|
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||||
|
container_name: chatqna-xeon-nginx-server
|
||||||
|
depends_on:
|
||||||
|
- chatqna-xeon-backend-server
|
||||||
|
- chatqna-xeon-ui-server
|
||||||
|
ports:
|
||||||
|
- "${NGINX_PORT:-80}:80"
|
||||||
|
environment:
|
||||||
|
- no_proxy=${no_proxy}
|
||||||
|
- https_proxy=${https_proxy}
|
||||||
|
- http_proxy=${http_proxy}
|
||||||
|
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||||
|
- FRONTEND_SERVICE_PORT=5173
|
||||||
|
- BACKEND_SERVICE_NAME=chatqna
|
||||||
|
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||||
|
- BACKEND_SERVICE_PORT=8888
|
||||||
|
- DATAPREP_SERVICE_IP=dataprep-mariadb-vector
|
||||||
|
- DATAPREP_SERVICE_PORT=5000
|
||||||
|
ipc: host
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
driver: bridge
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
# Copyright (C) 2025 Intel Corporation
|
# Copyright (C) 2025 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
rm *.json
|
if ls *.json 1> /dev/null 2>&1; then
|
||||||
|
rm *.json
|
||||||
|
fi
|
||||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
|
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/chatqna_megaservice_grafana.json
|
||||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/qdrant_grafana.json
|
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/qdrant_grafana.json
|
||||||
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/milvus_grafana.json
|
wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/milvus_grafana.json
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ pushd "../../../../../" > /dev/null
|
|||||||
source .set_env.sh
|
source .set_env.sh
|
||||||
popd > /dev/null
|
popd > /dev/null
|
||||||
|
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export HF_TOKEN=${HF_TOKEN}
|
||||||
|
export host_ip=${ip_address}
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
|||||||
25
ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
Executable file
25
ChatQnA/docker_compose/intel/cpu/xeon/set_env_mariadb.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright (C) 2025 MariaDB Foundation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
pushd "../../../../../" > /dev/null
|
||||||
|
source .set_env.sh
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
|
||||||
|
echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN."
|
||||||
|
fi
|
||||||
|
|
||||||
|
export host_ip=$(hostname -I | awk '{print $1}')
|
||||||
|
export MARIADB_DATABASE="vectordb"
|
||||||
|
export MARIADB_USER="chatqna"
|
||||||
|
export MARIADB_PASSWORD="password"
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
export LOGFLAG=""
|
||||||
|
export no_proxy="$no_proxy,chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,jaeger,prometheus,grafana,node-exporter"
|
||||||
|
export LLM_SERVER_PORT=9000
|
||||||
|
export NGINX_PORT=80
|
||||||
@@ -43,7 +43,7 @@ Some HuggingFace resources, such as some models, are only accessible if you have
|
|||||||
|
|
||||||
### Configure the Deployment Environment
|
### Configure the Deployment Environment
|
||||||
|
|
||||||
To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory:
|
To set up environment variables for deploying ChatQnA services, source the _setup_env.sh_ script in this directory (If using faqgen or guardrails, source the _set_env_faqgen.sh_):
|
||||||
|
|
||||||
```
|
```
|
||||||
source ./set_env.sh
|
source ./set_env.sh
|
||||||
|
|||||||
@@ -4,12 +4,20 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
# Function to prompt for input and set environment variables
|
# Function to prompt for input and set environment variables
|
||||||
|
NON_INTERACTIVE=${NON_INTERACTIVE:-false}
|
||||||
|
|
||||||
prompt_for_env_var() {
|
prompt_for_env_var() {
|
||||||
local var_name="$1"
|
local var_name="$1"
|
||||||
local prompt_message="$2"
|
local prompt_message="$2"
|
||||||
local default_value="$3"
|
local default_value="$3"
|
||||||
local mandatory="$4"
|
local mandatory="$4"
|
||||||
|
|
||||||
|
if [[ "$NON_INTERACTIVE" == "true" ]]; then
|
||||||
|
echo "Non-interactive environment detected. Setting $var_name to default: $default_value"
|
||||||
|
export "$var_name"="$default_value"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$mandatory" == "true" ]]; then
|
if [[ "$mandatory" == "true" ]]; then
|
||||||
while [[ -z "$value" ]]; do
|
while [[ -z "$value" ]]; do
|
||||||
read -p "$prompt_message [default: \"${default_value}\"]: " value
|
read -p "$prompt_message [default: \"${default_value}\"]: " value
|
||||||
@@ -34,7 +42,7 @@ popd > /dev/null
|
|||||||
|
|
||||||
# Prompt the user for each required environment variable
|
# Prompt the user for each required environment variable
|
||||||
prompt_for_env_var "EMBEDDING_MODEL_ID" "Enter the EMBEDDING_MODEL_ID" "BAAI/bge-base-en-v1.5" false
|
prompt_for_env_var "EMBEDDING_MODEL_ID" "Enter the EMBEDDING_MODEL_ID" "BAAI/bge-base-en-v1.5" false
|
||||||
prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "" true
|
prompt_for_env_var "HUGGINGFACEHUB_API_TOKEN" "Enter the HUGGINGFACEHUB_API_TOKEN" "${HF_TOKEN}" true
|
||||||
prompt_for_env_var "RERANK_MODEL_ID" "Enter the RERANK_MODEL_ID" "BAAI/bge-reranker-base" false
|
prompt_for_env_var "RERANK_MODEL_ID" "Enter the RERANK_MODEL_ID" "BAAI/bge-reranker-base" false
|
||||||
prompt_for_env_var "LLM_MODEL_ID" "Enter the LLM_MODEL_ID" "meta-llama/Meta-Llama-3-8B-Instruct" false
|
prompt_for_env_var "LLM_MODEL_ID" "Enter the LLM_MODEL_ID" "meta-llama/Meta-Llama-3-8B-Instruct" false
|
||||||
prompt_for_env_var "INDEX_NAME" "Enter the INDEX_NAME" "rag-redis" false
|
prompt_for_env_var "INDEX_NAME" "Enter the INDEX_NAME" "rag-redis" false
|
||||||
@@ -42,34 +50,40 @@ prompt_for_env_var "NUM_CARDS" "Enter the number of Gaudi devices" "1" false
|
|||||||
prompt_for_env_var "host_ip" "Enter the host_ip" "$(curl ifconfig.me)" false
|
prompt_for_env_var "host_ip" "Enter the host_ip" "$(curl ifconfig.me)" false
|
||||||
|
|
||||||
#Query for enabling http_proxy
|
#Query for enabling http_proxy
|
||||||
prompt_for_env_var "http_proxy" "Enter the http_proxy." "" false
|
prompt_for_env_var "http_proxy" "Enter the http_proxy." "${http_proxy}" false
|
||||||
|
|
||||||
#Query for enabling https_proxy
|
#Query for enabling https_proxy
|
||||||
prompt_for_env_var "https_proxy" "Enter the https_proxy." "" false
|
prompt_for_env_var "http_proxy" "Enter the http_proxy." "${https_proxy}" false
|
||||||
|
|
||||||
#Query for enabling no_proxy
|
#Query for enabling no_proxy
|
||||||
prompt_for_env_var "no_proxy" "Enter the no_proxy." "" false
|
prompt_for_env_var "no_proxy" "Enter the no_proxy." "${no_proxy}" false
|
||||||
|
|
||||||
# Query for enabling logging
|
# Query for enabling logging
|
||||||
read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
|
if [[ "$NON_INTERACTIVE" == "true" ]]; then
|
||||||
if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
|
# Query for enabling logging
|
||||||
export LOGFLAG=true
|
prompt_for_env_var "LOGFLAG" "Enable logging? (yes/no): " "true" false
|
||||||
|
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||||
|
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||||
|
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||||
|
telemetry_flag=true
|
||||||
else
|
else
|
||||||
export LOGFLAG=false
|
# Query for enabling logging
|
||||||
fi
|
read -p "Enable logging? (yes/no): " logging && logging=$(echo "$logging" | tr '[:upper:]' '[:lower:]')
|
||||||
|
if [[ "$logging" == "yes" || "$logging" == "y" ]]; then
|
||||||
# Query for enabling OpenTelemetry Tracing Endpoint
|
export LOGFLAG=true
|
||||||
read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
|
else
|
||||||
if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
|
export LOGFLAG=false
|
||||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
fi
|
||||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
# Query for enabling OpenTelemetry Tracing Endpoint
|
||||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
read -p "Enable OpenTelemetry Tracing Endpoint? (yes/no): " telemetry && telemetry=$(echo "$telemetry" | tr '[:upper:]' '[:lower:]')
|
||||||
telemetry_flag=true
|
if [[ "$telemetry" == "yes" || "$telemetry" == "y" ]]; then
|
||||||
pushd "grafana/dashboards" > /dev/null
|
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
||||||
source download_opea_dashboard.sh
|
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
||||||
popd > /dev/null
|
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
||||||
else
|
telemetry_flag=true
|
||||||
telemetry_flag=false
|
else
|
||||||
|
telemetry_flag=false
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Generate the .env file
|
# Generate the .env file
|
||||||
|
|||||||
32
ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
Executable file
32
ChatQnA/docker_compose/intel/hpu/gaudi/set_env_faqgen.sh
Executable file
@@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
pushd "../../../../../" > /dev/null
|
||||||
|
source .set_env.sh
|
||||||
|
popd > /dev/null
|
||||||
|
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||||
|
export HF_TOKEN=${HF_TOKEN}
|
||||||
|
export host_ip=${ip_address}
|
||||||
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||||
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||||
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
export INDEX_NAME="rag-redis"
|
||||||
|
export NUM_CARDS=1
|
||||||
|
export VLLM_SKIP_WARMUP=true
|
||||||
|
export LOGFLAG=True
|
||||||
|
export http_proxy=${http_proxy}
|
||||||
|
export https_proxy=${https_proxy}
|
||||||
|
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
||||||
|
|
||||||
|
export LLM_ENDPOINT_PORT=8010
|
||||||
|
export LLM_SERVER_PORT=9001
|
||||||
|
export CHATQNA_BACKEND_PORT=8888
|
||||||
|
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||||
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||||
|
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||||
|
export NGINX_PORT=80
|
||||||
|
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
||||||
|
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
||||||
123
ChatQnA/tests/README.md
Normal file
123
ChatQnA/tests/README.md
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
# ChatQnA E2E test scripts
|
||||||
|
|
||||||
|
## Set the required environment variable
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run test
|
||||||
|
|
||||||
|
On Intel Xeon with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_tgi_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with MariaDB Vector:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_mariadb_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with Pinecone:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_pinecone_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with Milvus
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_milvus_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon with Qdrant
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_qdrant_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Xeon without Rerank:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_without_rerank_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_tgi_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi with Guardrails:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_guardrails_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On Intel Gaudi without Rerank:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_without_rerank_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On AMD ROCm with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_on_rocm.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
On AMD ROCm with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_vllm_on_rocm.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On Intel Xeon with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_tgi_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On Intel Xeon with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_on_xeon.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On Intel Gaudi with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_tgi_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On Intel Gaudi with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_on_gaudi.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On AMD ROCm with TGI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_on_rocm.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Test FAQ Generation On AMD ROCm with vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash test_compose_faqgen_vllm_on_rocm.sh
|
||||||
|
```
|
||||||
@@ -24,8 +24,8 @@ function build_docker_images() {
|
|||||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
popd && sleep 1s
|
popd && sleep 1s
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm-gaudi nginx"
|
service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm-gaudi nginx"
|
||||||
@@ -36,27 +36,7 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
source set_env_faqgen.sh
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export NUM_CARDS=1
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export LLM_ENDPOINT_PORT=8010
|
|
||||||
export LLM_SERVER_PORT=9001
|
|
||||||
export CHATQNA_BACKEND_PORT=8888
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
|
||||||
export NGINX_PORT=80
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
|
||||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
|
||||||
export HF_TOKEN=${HF_TOKEN}
|
|
||||||
export VLLM_SKIP_WARMUP=true
|
|
||||||
export LOGFLAG=True
|
|
||||||
export http_proxy=${http_proxy}
|
|
||||||
export https_proxy=${https_proxy}
|
|
||||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -15,44 +15,7 @@ WORKPATH=$(dirname "$PWD")
|
|||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
ip_address=$(hostname -I | awk '{print $1}')
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export HOST_IP=${ip_address}
|
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen.sh
|
||||||
export HOST_IP_EXTERNAL=${ip_address}
|
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
|
||||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
|
||||||
export CHATQNA_NGINX_PORT=80
|
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
|
||||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
|
||||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
|
||||||
export CHATQNA_TGI_SERVICE_PORT=18008
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
|
||||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
|
||||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
|
||||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
|
||||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_TGI_SERVICE_PORT}"
|
|
||||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
|
||||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
|
||||||
export CHATQNA_INDEX_NAME="rag-redis"
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
|
||||||
|
|
||||||
export PATH="~/miniconda3/bin:$PATH"
|
export PATH="~/miniconda3/bin:$PATH"
|
||||||
|
|
||||||
|
|||||||
@@ -37,26 +37,16 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export LLM_ENDPOINT_PORT=8010
|
|
||||||
export LLM_SERVER_PORT=9001
|
export LLM_SERVER_PORT=9001
|
||||||
export CHATQNA_BACKEND_PORT=8888
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||||
export NGINX_PORT=80
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
|
||||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
|
||||||
export HF_TOKEN=${HF_TOKEN}
|
|
||||||
export VLLM_SKIP_WARMUP=true
|
export VLLM_SKIP_WARMUP=true
|
||||||
export LOGFLAG=True
|
export LOGFLAG=True
|
||||||
export http_proxy=${http_proxy}
|
export http_proxy=${http_proxy}
|
||||||
export https_proxy=${https_proxy}
|
export https_proxy=${https_proxy}
|
||||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
||||||
|
source set_env.sh
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_faqgen.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -33,25 +33,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export LLM_ENDPOINT_PORT=8010
|
|
||||||
export LLM_SERVER_PORT=9001
|
|
||||||
export CHATQNA_BACKEND_PORT=8888
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
|
||||||
export NGINX_PORT=80
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
||||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
source set_env_faqgen.sh
|
||||||
export HF_TOKEN=${HF_TOKEN}
|
|
||||||
export LOGFLAG=True
|
|
||||||
export http_proxy=${http_proxy}
|
|
||||||
export https_proxy=${https_proxy}
|
|
||||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-gaudi-backend-server,chatqna-gaudi-ui-server,chatqna-gaudi-nginx-server"
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -37,25 +37,16 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export LLM_ENDPOINT_PORT=8010
|
|
||||||
export LLM_SERVER_PORT=9001
|
export LLM_SERVER_PORT=9001
|
||||||
export CHATQNA_BACKEND_PORT=8888
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6377
|
export CHATQNA_REDIS_VECTOR_PORT=6377
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8006
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
export CHATQNA_FRONTEND_SERVICE_PORT=5175
|
||||||
export NGINX_PORT=80
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
|
||||||
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
|
|
||||||
export HF_TOKEN=${HF_TOKEN}
|
|
||||||
export LOGFLAG=True
|
export LOGFLAG=True
|
||||||
export http_proxy=${http_proxy}
|
export http_proxy=${http_proxy}
|
||||||
export https_proxy=${https_proxy}
|
export https_proxy=${https_proxy}
|
||||||
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
export no_proxy="${ip_address},redis-vector-db,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service,guardrails,llm-faqgen,chatqna-xeon-backend-server,chatqna-xeon-ui-server,chatqna-xeon-nginx-server"
|
||||||
|
source set_env.sh
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_faqgen_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -14,41 +14,7 @@ WORKPATH=$(dirname "$PWD")
|
|||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
ip_address=$(hostname -I | awk '{print $1}')
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export HOST_IP=${ip_address}
|
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_faqgen_vllm.sh
|
||||||
export HOST_IP_EXTERNAL=${ip_address}
|
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
|
||||||
export CHATQNA_LLM_FAQGEN_PORT=18011
|
|
||||||
export CHATQNA_NGINX_PORT=80
|
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
|
||||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
|
||||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
|
||||||
export CHATQNA_VLLM_SERVICE_PORT=18008
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
|
||||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
|
|
||||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
|
||||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
|
||||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
|
||||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
|
||||||
export LLM_ENDPOINT="http://${HOST_IP}:${CHATQNA_VLLM_SERVICE_PORT}"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
|
||||||
export CHATQNA_INDEX_NAME="rag-redis"
|
|
||||||
export CHATQNA_TYPE="CHATQNA_FAQGEN"
|
|
||||||
export FAQGen_COMPONENT_NAME="OpeaFaqGenvLLM"
|
|
||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -24,8 +24,8 @@ function build_docker_images() {
|
|||||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
popd && sleep 1s
|
popd && sleep 1s
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"
|
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"
|
||||||
@@ -36,14 +36,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export NUM_CARDS=1
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
|
||||||
|
source set_env_faqgen.sh
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_guardrails.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_guardrails.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
170
ChatQnA/tests/test_compose_mariadb_on_xeon.sh
Normal file
170
ChatQnA/tests/test_compose_mariadb_on_xeon.sh
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (C) 2025 MariaDB Foundation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
|
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||||
|
export REGISTRY=${IMAGE_REPO}
|
||||||
|
export TAG=${IMAGE_TAG}
|
||||||
|
export MODEL_CACHE=${model_cache:-"./data"}
|
||||||
|
|
||||||
|
WORKPATH=$(dirname "$PWD")
|
||||||
|
LOG_PATH="$WORKPATH/tests"
|
||||||
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
|
function build_docker_images() {
|
||||||
|
opea_branch=${opea_branch:-"main"}
|
||||||
|
cd $WORKPATH/docker_image_build
|
||||||
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
git clone https://github.com/vllm-project/vllm.git && cd vllm
|
||||||
|
VLLM_VER="v0.8.3"
|
||||||
|
echo "Check out vLLM tag ${VLLM_VER}"
|
||||||
|
git checkout ${VLLM_VER} &> /dev/null
|
||||||
|
# make sure NOT change the pwd
|
||||||
|
cd ../
|
||||||
|
|
||||||
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
|
service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
|
||||||
|
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||||
|
|
||||||
|
docker images && sleep 1s
|
||||||
|
}
|
||||||
|
|
||||||
|
function start_services() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
export MARIADB_PASSWORD="test"
|
||||||
|
source set_env_mariadb.sh
|
||||||
|
|
||||||
|
# Start Docker Containers
|
||||||
|
docker compose -f compose_mariadb.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
n=0
|
||||||
|
until [[ "$n" -ge 100 ]]; do
|
||||||
|
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
|
||||||
|
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5s
|
||||||
|
n=$((n+1))
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate_service() {
|
||||||
|
local URL="$1"
|
||||||
|
local EXPECTED_RESULT="$2"
|
||||||
|
local SERVICE_NAME="$3"
|
||||||
|
local DOCKER_NAME="$4"
|
||||||
|
local INPUT_DATA="$5"
|
||||||
|
|
||||||
|
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||||
|
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||||
|
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||||
|
|
||||||
|
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||||
|
|
||||||
|
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||||
|
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||||
|
else
|
||||||
|
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||||
|
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||||
|
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1s
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate_microservices() {
|
||||||
|
# Check if the microservices are running correctly.
|
||||||
|
sleep 3m
|
||||||
|
|
||||||
|
# tei for embedding service
|
||||||
|
validate_service \
|
||||||
|
"${ip_address}:6006/embed" \
|
||||||
|
"\[\[" \
|
||||||
|
"tei-embedding" \
|
||||||
|
"tei-embedding-server" \
|
||||||
|
'{"inputs":"What is Deep Learning?"}'
|
||||||
|
|
||||||
|
# retrieval microservice
|
||||||
|
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||||
|
validate_service \
|
||||||
|
"${ip_address}:7000/v1/retrieval" \
|
||||||
|
" " \
|
||||||
|
"retrieval" \
|
||||||
|
"retriever-mariadb-vector" \
|
||||||
|
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
|
||||||
|
|
||||||
|
# tei for rerank microservice
|
||||||
|
validate_service \
|
||||||
|
"${ip_address}:8808/rerank" \
|
||||||
|
'{"index":1,"score":' \
|
||||||
|
"tei-rerank" \
|
||||||
|
"tei-reranking-server" \
|
||||||
|
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
|
||||||
|
|
||||||
|
# vllm for llm service
|
||||||
|
validate_service \
|
||||||
|
"${ip_address}:9009/v1/chat/completions" \
|
||||||
|
"content" \
|
||||||
|
"vllm-llm" \
|
||||||
|
"vllm-service" \
|
||||||
|
'{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate_megaservice() {
|
||||||
|
# Curl the Mega Service
|
||||||
|
validate_service \
|
||||||
|
"${ip_address}:8888/v1/chatqna" \
|
||||||
|
"Nike" \
|
||||||
|
"mega-chatqna" \
|
||||||
|
"chatqna-xeon-backend-server" \
|
||||||
|
'{"messages": "What is the revenue of Nike in 2023?"}'
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function stop_docker() {
|
||||||
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
docker compose -f compose_mariadb.yaml down
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_microservices"
|
||||||
|
validate_microservices
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
main
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -39,11 +39,8 @@ function build_docker_images() {
|
|||||||
}
|
}
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LOGFLAG=true
|
export LOGFLAG=true
|
||||||
|
source set_env.sh
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_milvus.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_milvus.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -24,8 +24,8 @@ function build_docker_images() {
|
|||||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
popd && sleep 1s
|
popd && sleep 1s
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
|
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
|
||||||
@@ -36,16 +36,10 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export NON_INTERACTIVE=true
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export NUM_CARDS=1
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export host_ip=${ip_address}
|
export host_ip=${ip_address}
|
||||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
export telemetry=yes
|
||||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
source set_env.sh
|
||||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -15,41 +15,7 @@ WORKPATH=$(dirname "$PWD")
|
|||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
ip_address=$(hostname -I | awk '{print $1}')
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export HOST_IP=${ip_address}
|
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env.sh
|
||||||
export HOST_IP_EXTERNAL=${ip_address}
|
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
|
||||||
export CHATQNA_NGINX_PORT=80
|
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
|
||||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
|
||||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
|
||||||
export CHATQNA_TGI_SERVICE_PORT=18008
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
|
||||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
|
||||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
|
||||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
|
||||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
|
||||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
|
||||||
export CHATQNA_INDEX_NAME="rag-redis"
|
|
||||||
|
|
||||||
export PATH="~/miniconda3/bin:$PATH"
|
export PATH="~/miniconda3/bin:$PATH"
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -40,15 +40,7 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
source set_env.sh
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export host_ip=${ip_address}
|
|
||||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
|
||||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
|
||||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose.yaml -f compose.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -41,14 +41,11 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
cd $WORKPATH/docker_compose/intel/cpu/xeon/
|
||||||
export no_proxy=${no_proxy},${ip_address}
|
export no_proxy=${no_proxy},${ip_address}
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST}
|
export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST}
|
||||||
export PINECONE_INDEX_NAME="langchain-test"
|
export PINECONE_INDEX_NAME="langchain-test"
|
||||||
export INDEX_NAME="langchain-test"
|
export INDEX_NAME="langchain-test"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export LOGFLAG=true
|
export LOGFLAG=true
|
||||||
|
source set_env.sh
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -40,11 +40,8 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-qdrant"
|
export INDEX_NAME="rag-qdrant"
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
source set_env.sh
|
||||||
|
|
||||||
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -32,15 +32,10 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export NON_INTERACTIVE=true
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
export host_ip=${ip_address}
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
export telemetry=yes
|
||||||
export NUM_CARDS=1
|
source set_env.sh
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
|
||||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
|
||||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -33,14 +33,7 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
source set_env.sh
|
||||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
|
||||||
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
|
||||||
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -14,42 +14,7 @@ WORKPATH=$(dirname "$PWD")
|
|||||||
LOG_PATH="$WORKPATH/tests"
|
LOG_PATH="$WORKPATH/tests"
|
||||||
ip_address=$(hostname -I | awk '{print $1}')
|
ip_address=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
export HOST_IP=${ip_address}
|
source $WORKPATH/docker_compose/amd/gpu/rocm/set_env_vllm.sh
|
||||||
export HOST_IP_EXTERNAL=${ip_address}
|
|
||||||
|
|
||||||
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
||||||
export CHATQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_PORT=8888
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_PORT=5173
|
|
||||||
export CHATQNA_NGINX_PORT=80
|
|
||||||
export CHATQNA_REDIS_DATAPREP_PORT=18103
|
|
||||||
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
|
||||||
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
|
||||||
export CHATQNA_REDIS_VECTOR_PORT=6379
|
|
||||||
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
|
||||||
export CHATQNA_TEI_RERANKING_PORT=18808
|
|
||||||
export CHATQNA_VLLM_SERVICE_PORT=18008
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
|
||||||
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP_EXTERNAL}
|
|
||||||
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
|
||||||
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
|
||||||
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://${HOST_IP_EXTERNAL}:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
|
||||||
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
|
||||||
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
|
||||||
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
|
||||||
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
|
||||||
|
|
||||||
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
|
||||||
export CHATQNA_INDEX_NAME="rag-redis"
|
|
||||||
|
|
||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -24,8 +24,8 @@ function build_docker_images() {
|
|||||||
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
popd && sleep 1s
|
popd && sleep 1s
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
|
service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
|
||||||
@@ -36,11 +36,8 @@ function build_docker_images() {
|
|||||||
|
|
||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
export NON_INTERACTIVE=true
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
source set_env.sh
|
||||||
export NUM_CARDS=1
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
set -e
|
set -xe
|
||||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||||
@@ -41,10 +41,7 @@ function build_docker_images() {
|
|||||||
function start_services() {
|
function start_services() {
|
||||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||||
|
|
||||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
source set_env.sh
|
||||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
||||||
export INDEX_NAME="rag-redis"
|
|
||||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
|
||||||
|
|
||||||
# Start Docker Containers
|
# Start Docker Containers
|
||||||
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
docker compose -f compose_without_rerank.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ARG IMAGE_REPO=opea
|
||||||
ARG BASE_TAG=latest
|
ARG BASE_TAG=latest
|
||||||
FROM opea/comps-base:$BASE_TAG
|
FROM $IMAGE_REPO/comps-base:$BASE_TAG
|
||||||
|
|
||||||
COPY ./codegen.py $HOME/codegen.py
|
COPY ./codegen.py $HOME/codegen.py
|
||||||
|
|
||||||
|
|||||||
@@ -52,18 +52,29 @@ This uses the default vLLM-based deployment profile (`codegen-xeon-vllm`).
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
||||||
export HOST_IP="your_external_ip_address"
|
export host_ip="your_external_ip_address"
|
||||||
# Replace with your Hugging Face Hub API token
|
# Replace with your Hugging Face Hub API token
|
||||||
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
||||||
|
|
||||||
# Optional: Configure proxy if needed
|
# Optional: Configure proxy if needed
|
||||||
# export http_proxy="your_http_proxy"
|
# export http_proxy="your_http_proxy"
|
||||||
# export https_proxy="your_https_proxy"
|
# export https_proxy="your_https_proxy"
|
||||||
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
|
# export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
|
||||||
source ../../../set_env.sh
|
source ../../../set_env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
_Note: The compose file might read additional variables from a `.env` file or expect them defined elsewhere. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||||
|
like
|
||||||
|
|
||||||
|
```
|
||||||
|
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
|
||||||
|
```
|
||||||
|
|
||||||
|
can be changed to small model if needed
|
||||||
|
|
||||||
|
```
|
||||||
|
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||||
|
```
|
||||||
|
|
||||||
2. **Start Services (vLLM Profile):**
|
2. **Start Services (vLLM Profile):**
|
||||||
|
|
||||||
@@ -91,7 +102,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
|
|||||||
- **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
|
- **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
|
||||||
- **To Run:**
|
- **To Run:**
|
||||||
```bash
|
```bash
|
||||||
# Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
|
# Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
|
||||||
docker compose --profile codegen-xeon-tgi up -d
|
docker compose --profile codegen-xeon-tgi up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -103,14 +114,14 @@ Key parameters are configured via environment variables set before running `dock
|
|||||||
|
|
||||||
| Environment Variable | Description | Default (Set Externally) |
|
| Environment Variable | Description | Default (Set Externally) |
|
||||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
||||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
||||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
||||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` |
|
||||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||||
|
|
||||||
@@ -150,23 +161,23 @@ Check logs for specific services: `docker compose logs <service_name>`
|
|||||||
|
|
||||||
### Run Validation Script/Commands
|
### Run Validation Script/Commands
|
||||||
|
|
||||||
Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is correctly set in your environment.
|
Use `curl` commands to test the main service endpoints. Ensure `host_ip` is correctly set in your environment.
|
||||||
|
|
||||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 8000 internally, exposed differently):**
|
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# This command structure targets the OpenAI-compatible vLLM endpoint
|
# This command structure targets the OpenAI-compatible vLLM endpoint
|
||||||
curl http://${HOST_IP}:8000/v1/chat/completions \
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
-X POST \
|
-X POST \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
-d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Expected Output:** A JSON response with generated code in `choices[0].message.content`.
|
- **Expected Output:** A JSON response with generated code in `choices[0].message.content`.
|
||||||
|
|
||||||
2. **Validate CodeGen Gateway (MegaService on default port 7778):**
|
2. **Validate CodeGen Gateway (MegaService on default port 7778):**
|
||||||
```bash
|
```bash
|
||||||
curl http://${HOST_IP}:7778/v1/codegen \
|
curl http://${host_ip}:7778/v1/codegen \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{"messages": "Write a Python function that adds two numbers."}'
|
-d '{"messages": "Write a Python function that adds two numbers."}'
|
||||||
```
|
```
|
||||||
@@ -179,8 +190,8 @@ Multiple UI options can be configured via the `compose.yaml`.
|
|||||||
### Gradio UI (Default)
|
### Gradio UI (Default)
|
||||||
|
|
||||||
Access the default Gradio UI by navigating to:
|
Access the default Gradio UI by navigating to:
|
||||||
`http://{HOST_IP}:8080`
|
`http://{host_ip}:5173`
|
||||||
_(Port `8080` is the default host mapping for `codegen-gradio-ui-server`)_
|
_(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_
|
||||||
|
|
||||||

|

|
||||||

|

|
||||||
@@ -189,7 +200,7 @@ _(Port `8080` is the default host mapping for `codegen-gradio-ui-server`)_
|
|||||||
|
|
||||||
1. Modify `compose.yaml`: Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`).
|
1. Modify `compose.yaml`: Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`).
|
||||||
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
|
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
|
||||||
3. Access: `http://{HOST_IP}:5173` (or the host port you mapped).
|
3. Access: `http://{host_ip}:5173` (or the host port you mapped).
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -197,7 +208,7 @@ _(Port `8080` is the default host mapping for `codegen-gradio-ui-server`)_
|
|||||||
|
|
||||||
1. Modify `compose.yaml`: Comment out the default UI service and uncomment/add the `codegen-xeon-react-ui-server` definition, ensuring correct port mapping (e.g., `"- 5174:80"`).
|
1. Modify `compose.yaml`: Comment out the default UI service and uncomment/add the `codegen-xeon-react-ui-server` definition, ensuring correct port mapping (e.g., `"- 5174:80"`).
|
||||||
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
|
2. Restart Docker Compose: `docker compose --profile <profile_name> up -d`
|
||||||
3. Access: `http://{HOST_IP}:5174` (or the host port you mapped).
|
3. Access: `http://{host_ip}:5174` (or the host port you mapped).
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -207,7 +218,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e
|
|||||||
|
|
||||||
1. **Install:** Find and install `Neural Copilot` from the VS Code Marketplace.
|
1. **Install:** Find and install `Neural Copilot` from the VS Code Marketplace.
|
||||||

|

|
||||||
2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${HOST_IP}:7778/v1/codegen` (use the correct port if changed).
|
2. **Configure:** Set the "Service URL" in the extension settings to your CodeGen backend endpoint: `http://${host_ip}:7778/v1/codegen` (use the correct port if changed).
|
||||||

|

|
||||||
3. **Usage:**
|
3. **Usage:**
|
||||||
- **Inline Suggestion:** Type a comment describing the code you want (e.g., `# Python function to read a file`) and wait for suggestions.
|
- **Inline Suggestion:** Type a comment describing the code you want (e.g., `# Python function to read a file`) and wait for suggestions.
|
||||||
@@ -218,7 +229,7 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e
|
|||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access.
|
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`. Ensure internet connectivity or correct proxy settings. Check logs of `tgi-service`/`vllm-service` and `tei-embedding-server`. Gated models need prior Hugging Face access.
|
||||||
- **Connection Errors:** Verify `HOST_IP` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `HOST_IP` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
|
- **Connection Errors:** Verify `host_ip` is correct and accessible. Check `docker ps` for port mappings. Ensure `no_proxy` includes `host_ip` if using a proxy. Check logs of the service failing to connect (e.g., `codegen-backend-server` logs if it can't reach `codegen-llm-server`).
|
||||||
- **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`.
|
- **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in `compose.yaml`.
|
||||||
- **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed.
|
- **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed.
|
||||||
|
|
||||||
|
|||||||
@@ -53,18 +53,29 @@ This uses the default vLLM-based deployment profile (`codegen-gaudi-vllm`).
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
# Replace with your host's external IP address (do not use localhost or 127.0.0.1)
|
||||||
export HOST_IP="your_external_ip_address"
|
export host_ip="your_external_ip_address"
|
||||||
# Replace with your Hugging Face Hub API token
|
# Replace with your Hugging Face Hub API token
|
||||||
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
export HUGGINGFACEHUB_API_TOKEN="your_huggingface_token"
|
||||||
|
|
||||||
# Optional: Configure proxy if needed
|
# Optional: Configure proxy if needed
|
||||||
# export http_proxy="your_http_proxy"
|
# export http_proxy="your_http_proxy"
|
||||||
# export https_proxy="your_https_proxy"
|
# export https_proxy="your_https_proxy"
|
||||||
# export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary
|
# export no_proxy="localhost,127.0.0.1,${host_ip}" # Add other hosts if necessary
|
||||||
source ../../../set_env.sh
|
source ../../../set_env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
_Note: Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
_Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._
|
||||||
|
like
|
||||||
|
|
||||||
|
```
|
||||||
|
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
|
||||||
|
```
|
||||||
|
|
||||||
|
can be changed to small model if needed
|
||||||
|
|
||||||
|
```
|
||||||
|
export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
|
||||||
|
```
|
||||||
|
|
||||||
2. **Start Services (vLLM Profile):**
|
2. **Start Services (vLLM Profile):**
|
||||||
|
|
||||||
@@ -94,7 +105,7 @@ The `compose.yaml` file uses Docker Compose profiles to select the LLM serving b
|
|||||||
- **Other Services:** Same CPU-based services as the vLLM profile.
|
- **Other Services:** Same CPU-based services as the vLLM profile.
|
||||||
- **To Run:**
|
- **To Run:**
|
||||||
```bash
|
```bash
|
||||||
# Ensure environment variables (HOST_IP, HUGGINGFACEHUB_API_TOKEN) are set
|
# Ensure environment variables (host_ip, HUGGINGFACEHUB_API_TOKEN) are set
|
||||||
docker compose --profile codegen-gaudi-tgi up -d
|
docker compose --profile codegen-gaudi-tgi up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -106,14 +117,14 @@ Key parameters are configured via environment variables set before running `dock
|
|||||||
|
|
||||||
| Environment Variable | Description | Default (Set Externally) |
|
| Environment Variable | Description | Default (Set Externally) |
|
||||||
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
| :-------------------------------------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------- |
|
||||||
| `HOST_IP` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
| `host_ip` | External IP address of the host machine. **Required.** | `your_external_ip_address` |
|
||||||
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
| `HUGGINGFACEHUB_API_TOKEN` | Your Hugging Face Hub token for model access. **Required.** | `your_huggingface_token` |
|
||||||
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct` |
|
| `LLM_MODEL_ID` | Hugging Face model ID for the CodeGen LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-32B-Instruct` |
|
||||||
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
| `EMBEDDING_MODEL_ID` | Hugging Face model ID for the embedding model (used by TEI service). Configured within `compose.yaml` environment. | `BAAI/bge-base-en-v1.5` |
|
||||||
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
| `LLM_ENDPOINT` | Internal URL for the LLM serving endpoint (used by `codegen-llm-server`). Configured in `compose.yaml`. | `http://codegen-tgi-server:80/generate` or `http://codegen-vllm-server:8000/v1/chat/completions` |
|
||||||
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
| `TEI_EMBEDDING_ENDPOINT` | Internal URL for the Embedding service. Configured in `compose.yaml`. | `http://codegen-tei-embedding-server:80/embed` |
|
||||||
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
| `DATAPREP_ENDPOINT` | Internal URL for the Data Preparation service. Configured in `compose.yaml`. | `http://codegen-dataprep-server:80/dataprep` |
|
||||||
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `HOST_IP` and port `7778`. | `http://${HOST_IP}:7778/v1/codegen` |
|
| `BACKEND_SERVICE_ENDPOINT` | External URL for the CodeGen Gateway (MegaService). Derived from `host_ip` and port `7778`. | `http://${host_ip}:7778/v1/codegen` |
|
||||||
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
| `*_PORT` (Internal) | Internal container ports (e.g., `80`, `6379`). Defined in `compose.yaml`. | N/A |
|
||||||
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
| `http_proxy` / `https_proxy`/`no_proxy` | Network proxy settings (if required). | `""` |
|
||||||
|
|
||||||
@@ -170,21 +181,21 @@ Check logs: `docker compose logs <service_name>`. Pay attention to `vllm-gaudi-s
|
|||||||
|
|
||||||
### Run Validation Script/Commands
|
### Run Validation Script/Commands
|
||||||
|
|
||||||
Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is correctly set.
|
Use `curl` commands targeting the main service endpoints. Ensure `host_ip` is correctly set.
|
||||||
|
|
||||||
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 8000 internally, exposed differently):**
|
1. **Validate LLM Serving Endpoint (Example for vLLM on default port 9000 internally, exposed differently):**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# This command structure targets the OpenAI-compatible vLLM endpoint
|
# This command structure targets the OpenAI-compatible vLLM endpoint
|
||||||
curl http://${HOST_IP}:8000/v1/chat/completions \
|
curl http://${host_ip}:9000/v1/chat/completions \
|
||||||
-X POST \
|
-X POST \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
-d '{"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}'
|
||||||
```
|
```
|
||||||
|
|
||||||
2. **Validate CodeGen Gateway (MegaService, default host port 7778):**
|
2. **Validate CodeGen Gateway (MegaService, default host port 7778):**
|
||||||
```bash
|
```bash
|
||||||
curl http://${HOST_IP}:7778/v1/codegen \
|
curl http://${host_ip}:7778/v1/codegen \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{"messages": "Implement a sorting algorithm in Python."}'
|
-d '{"messages": "Implement a sorting algorithm in Python."}'
|
||||||
```
|
```
|
||||||
@@ -197,8 +208,8 @@ UI options are similar to the Xeon deployment.
|
|||||||
### Gradio UI (Default)
|
### Gradio UI (Default)
|
||||||
|
|
||||||
Access the default Gradio UI:
|
Access the default Gradio UI:
|
||||||
`http://{HOST_IP}:8080`
|
`http://{host_ip}:5173`
|
||||||
_(Port `8080` is the default host mapping)_
|
_(Port `5173` is the default host mapping)_
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -206,17 +217,17 @@ _(Port `8080` is the default host mapping)_
|
|||||||
|
|
||||||
1. Modify `compose.yaml`: Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`).
|
1. Modify `compose.yaml`: Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`).
|
||||||
2. Restart: `docker compose --profile <profile_name> up -d`
|
2. Restart: `docker compose --profile <profile_name> up -d`
|
||||||
3. Access: `http://{HOST_IP}:5173`
|
3. Access: `http://{host_ip}:5173`
|
||||||
|
|
||||||
### React UI (Optional)
|
### React UI (Optional)
|
||||||
|
|
||||||
1. Modify `compose.yaml`: Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`).
|
1. Modify `compose.yaml`: Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`).
|
||||||
2. Restart: `docker compose --profile <profile_name> up -d`
|
2. Restart: `docker compose --profile <profile_name> up -d`
|
||||||
3. Access: `http://{HOST_IP}:5174`
|
3. Access: `http://{host_ip}:5174`
|
||||||
|
|
||||||
### VS Code Extension (Optional)
|
### VS Code Extension (Optional)
|
||||||
|
|
||||||
Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${HOST_IP}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).
|
Use the `Neural Copilot` extension configured with the CodeGen backend URL: `http://${host_ip}:7778/v1/codegen`. (See Xeon README for detailed setup screenshots).
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
@@ -226,7 +237,7 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt
|
|||||||
- Verify `runtime: habana` and volume mounts in `compose.yaml`.
|
- Verify `runtime: habana` and volume mounts in `compose.yaml`.
|
||||||
- Gaudi initialization can take significant time and memory. Monitor resource usage.
|
- Gaudi initialization can take significant time and memory. Monitor resource usage.
|
||||||
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs.
|
- **Model Download Issues:** Check `HUGGINGFACEHUB_API_TOKEN`, internet access, proxy settings. Check LLM service logs.
|
||||||
- **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs.
|
- **Connection Errors:** Verify `host_ip`, ports, and proxy settings. Use `docker ps` and check service logs.
|
||||||
|
|
||||||
## Stopping the Application
|
## Stopping the Application
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ services:
|
|||||||
codegen:
|
codegen:
|
||||||
build:
|
build:
|
||||||
args:
|
args:
|
||||||
|
IMAGE_REPO: ${REGISTRY}
|
||||||
|
BASE_TAG: ${TAG}
|
||||||
http_proxy: ${http_proxy}
|
http_proxy: ${http_proxy}
|
||||||
https_proxy: ${https_proxy}
|
https_proxy: ${https_proxy}
|
||||||
no_proxy: ${no_proxy}
|
no_proxy: ${no_proxy}
|
||||||
@@ -39,6 +41,7 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: GenAIComps
|
context: GenAIComps
|
||||||
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
|
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
|
||||||
|
extends: codegen
|
||||||
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
|
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
|
||||||
vllm:
|
vllm:
|
||||||
build:
|
build:
|
||||||
|
|||||||
@@ -27,25 +27,19 @@ export no_proxy=${no_proxy},${ip_address}
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
# Download Gaudi vllm of latest tag
|
# Download Gaudi vllm of latest tag
|
||||||
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
|
||||||
VLLM_VER=v0.6.6.post1+Gaudi-1.20.0
|
VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
|
||||||
echo "Check out vLLM tag ${VLLM_VER}"
|
echo "Check out vLLM tag ${VLLM_FORK_VER}"
|
||||||
git checkout ${VLLM_VER} &> /dev/null && cd ../
|
git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="codegen codegen-gradio-ui llm-textgen vllm-gaudi dataprep retriever embedding"
|
service_list="codegen codegen-gradio-ui llm-textgen vllm-gaudi dataprep retriever embedding"
|
||||||
@@ -250,24 +244,36 @@ function main() {
|
|||||||
stop_docker "${docker_compose_profiles[${i}]}"
|
stop_docker "${docker_compose_profiles[${i}]}"
|
||||||
done
|
done
|
||||||
|
|
||||||
# build docker images
|
echo "::group::build_docker_images"
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
# loop all profiles
|
# loop all profiles
|
||||||
for ((i = 0; i < len_profiles; i++)); do
|
for ((i = 0; i < len_profiles; i++)); do
|
||||||
echo "Process [${i}]: ${docker_compose_profiles[$i]}, ${docker_llm_container_names[${i}]}"
|
echo "Process [${i}]: ${docker_compose_profiles[$i]}, ${docker_llm_container_names[${i}]}"
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
start_services "${docker_compose_profiles[${i}]}" "${docker_llm_container_names[${i}]}"
|
start_services "${docker_compose_profiles[${i}]}" "${docker_llm_container_names[${i}]}"
|
||||||
|
echo "::endgroup::"
|
||||||
docker ps -a
|
docker ps -a
|
||||||
|
|
||||||
|
echo "::group::validate_microservices"
|
||||||
validate_microservices "${docker_llm_container_names[${i}]}"
|
validate_microservices "${docker_llm_container_names[${i}]}"
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
validate_megaservice
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_gradio"
|
||||||
validate_gradio
|
validate_gradio
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
stop_docker "${docker_compose_profiles[${i}]}"
|
stop_docker "${docker_compose_profiles[${i}]}"
|
||||||
sleep 5s
|
sleep 5s
|
||||||
done
|
done
|
||||||
|
|
||||||
echo y | docker system prune
|
docker system prune -f
|
||||||
}
|
}
|
||||||
|
|
||||||
main
|
main
|
||||||
|
|||||||
@@ -17,19 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="codegen codegen-ui llm-textgen"
|
service_list="codegen codegen-ui llm-textgen"
|
||||||
@@ -164,18 +158,35 @@ function stop_docker() {
|
|||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
start_services
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_microservices"
|
||||||
validate_microservices
|
validate_microservices
|
||||||
validate_megaservice
|
echo "::endgroup::"
|
||||||
validate_frontend
|
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_frontend"
|
||||||
|
validate_frontend
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
echo y | docker system prune
|
echo "::endgroup::"
|
||||||
cd $WORKPATH
|
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,19 +27,13 @@ export no_proxy=${no_proxy},${ip_address}
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
git clone https://github.com/vllm-project/vllm.git && cd vllm
|
git clone https://github.com/vllm-project/vllm.git && cd vllm
|
||||||
VLLM_VER="v0.8.3"
|
VLLM_VER="v0.8.3"
|
||||||
@@ -256,17 +250,28 @@ function main() {
|
|||||||
for ((i = 0; i < len_profiles; i++)); do
|
for ((i = 0; i < len_profiles; i++)); do
|
||||||
echo "Process [${i}]: ${docker_compose_profiles[$i]}, ${docker_llm_container_names[${i}]}"
|
echo "Process [${i}]: ${docker_compose_profiles[$i]}, ${docker_llm_container_names[${i}]}"
|
||||||
docker ps -a
|
docker ps -a
|
||||||
start_services "${docker_compose_profiles[${i}]}" "${docker_llm_container_names[${i}]}"
|
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
|
start_services "${docker_compose_profiles[${i}]}" "${docker_llm_container_names[${i}]}"
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_microservices"
|
||||||
validate_microservices "${docker_llm_container_names[${i}]}"
|
validate_microservices "${docker_llm_container_names[${i}]}"
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
validate_megaservice
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_gradio"
|
||||||
validate_gradio
|
validate_gradio
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
stop_docker "${docker_compose_profiles[${i}]}"
|
stop_docker "${docker_compose_profiles[${i}]}"
|
||||||
sleep 5s
|
sleep 5s
|
||||||
done
|
done
|
||||||
|
|
||||||
echo y | docker system prune
|
docker system prune -f
|
||||||
}
|
}
|
||||||
|
|
||||||
main
|
main
|
||||||
|
|||||||
@@ -17,19 +17,13 @@ ip_address=$(hostname -I | awk '{print $1}')
|
|||||||
|
|
||||||
function build_docker_images() {
|
function build_docker_images() {
|
||||||
opea_branch=${opea_branch:-"main"}
|
opea_branch=${opea_branch:-"main"}
|
||||||
# If the opea_branch isn't main, replace the git clone branch in Dockerfile.
|
|
||||||
if [[ "${opea_branch}" != "main" ]]; then
|
|
||||||
cd $WORKPATH
|
|
||||||
OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git"
|
|
||||||
NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git"
|
|
||||||
find . -type f -name "Dockerfile*" | while read -r file; do
|
|
||||||
echo "Processing file: $file"
|
|
||||||
sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file"
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd $WORKPATH/docker_image_build
|
cd $WORKPATH/docker_image_build
|
||||||
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
|
||||||
|
pushd GenAIComps
|
||||||
|
echo "GenAIComps test commit is $(git rev-parse HEAD)"
|
||||||
|
docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||||
|
popd && sleep 1s
|
||||||
|
|
||||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||||
service_list="vllm-rocm llm-textgen codegen codegen-ui"
|
service_list="vllm-rocm llm-textgen codegen codegen-ui"
|
||||||
@@ -164,17 +158,35 @@ function stop_docker() {
|
|||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::build_docker_images"
|
||||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::start_services"
|
||||||
start_services
|
start_services
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_microservices"
|
||||||
validate_microservices
|
validate_microservices
|
||||||
validate_megaservice
|
echo "::endgroup::"
|
||||||
validate_frontend
|
|
||||||
|
|
||||||
|
echo "::group::validate_megaservice"
|
||||||
|
validate_megaservice
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::validate_frontend"
|
||||||
|
validate_frontend
|
||||||
|
echo "::endgroup::"
|
||||||
|
|
||||||
|
echo "::group::stop_docker"
|
||||||
stop_docker
|
stop_docker
|
||||||
echo y | docker system prune
|
echo "::endgroup::"
|
||||||
cd $WORKPATH
|
|
||||||
|
docker system prune -f
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
ARG IMAGE_REPO=opea
|
||||||
ARG BASE_TAG=latest
|
ARG BASE_TAG=latest
|
||||||
FROM opea/comps-base:$BASE_TAG
|
FROM $IMAGE_REPO/comps-base:$BASE_TAG
|
||||||
|
|
||||||
COPY ./code_translation.py $HOME/code_translation.py
|
COPY ./code_translation.py $HOME/code_translation.py
|
||||||
|
|
||||||
|
|||||||
@@ -22,12 +22,11 @@ This Code Translation use case demonstrates Text Generation Inference across mul
|
|||||||
|
|
||||||
The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.
|
The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.
|
||||||
|
|
||||||
| Category | Deployment Option | Description |
|
| Category | Deployment Option | Description |
|
||||||
| ---------------------- | -------------------- | ----------------------------------------------------------------- |
|
| ---------------------- | -------------------- | --------------------------------------------------------------------------- |
|
||||||
| On-premise Deployments | Docker compose | [CodeTrans deployment on Xeon](./docker_compose/intel/cpu/xeon) |
|
| On-premise Deployments | Docker compose | [CodeTrans deployment on Xeon](./docker_compose/intel/cpu/xeon/README.md) |
|
||||||
| | | [CodeTrans deployment on Gaudi](./docker_compose/intel/hpu/gaudi) |
|
| | | [CodeTrans deployment on Gaudi](./docker_compose/intel/hpu/gaudi/README.md) |
|
||||||
| | | [CodeTrans deployment on AMD ROCm](./docker_compose/amd/gpu/rocm) |
|
| | | [CodeTrans deployment on AMD ROCm](./docker_compose/amd/gpu/rocm/README.md) |
|
||||||
| | Kubernetes | [Helm Charts](./kubernetes/helm) |
|
| | Kubernetes | [Helm Charts](./kubernetes/helm/README.md) |
|
||||||
| | | [GMC](./kubernetes/gmc) |
|
| | Azure | Work-in-progress |
|
||||||
| | Azure | Work-in-progress |
|
| | Intel Tiber AI Cloud | Work-in-progress |
|
||||||
| | Intel Tiber AI Cloud | Work-in-progress |
|
|
||||||
|
|||||||
@@ -44,3 +44,38 @@ Some HuggingFace resources, such as some models, are only accessible if the deve
|
|||||||
|
|
||||||
2. (Docker only) If all microservices work well, check the port ${host_ip}:7777, the port may be allocated by other users, you can modify the `compose.yaml`.
|
2. (Docker only) If all microservices work well, check the port ${host_ip}:7777, the port may be allocated by other users, you can modify the `compose.yaml`.
|
||||||
3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`.
|
3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`.
|
||||||
|
|
||||||
|
## Monitoring OPEA Services with Prometheus and Grafana Dashboard
|
||||||
|
|
||||||
|
OPEA microservice deployment can easily be monitored through Grafana dashboards using data collected via Prometheus. Follow the [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/grafana/README.md) to setup Prometheus and Grafana servers and import dashboards to monitor the OPEA services.
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|
|
||||||
|
## Tracing with OpenTelemetry and Jaeger
|
||||||
|
|
||||||
|
> NOTE: This feature is disabled by default. Please use the compose.telemetry.yaml file to enable this feature.
|
||||||
|
|
||||||
|
OPEA microservice and [TGI](https://huggingface.co/docs/text-generation-inference/en/index)/[TEI](https://huggingface.co/docs/text-embeddings-inference/en/index) serving can easily be traced through [Jaeger](https://www.jaegertracing.io/) dashboards in conjunction with [OpenTelemetry](https://opentelemetry.io/) Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.
|
||||||
|
|
||||||
|
Tracing data is exported to http://{EXTERNAL_IP}:4318/v1/traces via Jaeger.
|
||||||
|
Users could also get the external IP via below command.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
|
||||||
|
```
|
||||||
|
|
||||||
|
Access the Jaeger dashboard UI at http://{EXTERNAL_IP}:16686
|
||||||
|
|
||||||
|
For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
|
||||||
|

|
||||||
|
|
||||||
|
Here is a screenshot for one tracing of TGI serving request.
|
||||||
|

|
||||||
|
|
||||||
|
There are also OPEA related tracings. Users could understand the time breakdown of each service request by looking into each opea:schedule operation.
|
||||||
|

|
||||||
|
|
||||||
|
There could be asynchronous function such as `llm/MicroService_asyn_generate` and user needs to check the trace of the asynchronous function in another operation like
|
||||||
|
opea:llm_generate_stream.
|
||||||
|

|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 90 KiB |
BIN
CodeTrans/assets/img/example_dashboards.png
Normal file
BIN
CodeTrans/assets/img/example_dashboards.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 100 KiB |
BIN
CodeTrans/assets/img/tgi_dashboard.png
Normal file
BIN
CodeTrans/assets/img/tgi_dashboard.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 414 KiB |
@@ -46,7 +46,7 @@ export http_proxy="Your_HTTP_Proxy" # http proxy if any
|
|||||||
export https_proxy="Your_HTTPs_Proxy" # https proxy if any
|
export https_proxy="Your_HTTPs_Proxy" # https proxy if any
|
||||||
export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed
|
export no_proxy=localhost,127.0.0.1,$host_ip # additional no proxies if needed
|
||||||
export NGINX_PORT=${your_nginx_port} # your usable port for nginx, 80 for example
|
export NGINX_PORT=${your_nginx_port} # your usable port for nginx, 80 for example
|
||||||
source ./set_env.sh
|
source docker_compose/intel/set_env.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
Consult the section on [CodeTrans Service configuration](#codetrans-configuration) for information on how service specific configuration parameters affect deployments.
|
Consult the section on [CodeTrans Service configuration](#codetrans-configuration) for information on how service specific configuration parameters affect deployments.
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user